test: update README of v2 sample test (#7121)

* test: update README of v2 sample test * address review comments by @zijianjoy * use absolute paths, suggested by @zijianjoy * Update README.md
2021-12-30 16:22:54 +08:00 · 2021-12-30 16:22:54 +08:00 · ca6e05591d
parent d60bc99bb6
commit ca6e05591d
7 changed files with 78 additions and 55 deletions
--- a/samples/test/README.md
+++ b/samples/test/README.md
@ -1,6 +1,7 @@
-# Test Samples
+# Sample Test

-These samples are built for testing purposes only.
-
-The [config.yaml](./config.yaml) holds test config for kubeflow-pipelines-samples-v2 test.
 Refer to [V2 samples test documentation](https://github.com/kubeflow/pipelines/tree/master/v2/test) for more details.
+
+## Test Samples
+
+Pipeline samples in this folder are built for testing purposes only.
--- a/samples/test/utils/kfp/samples/test/utils.py
+++ b/samples/test/utils/kfp/samples/test/utils.py
@ -210,8 +210,6 @@ def _run_test(callback):

    def main(
        pipeline_root: Optional[str] = None,  # example
-        host: Optional[str] = None,
-        external_host: Optional[str] = None,
        launcher_v2_image: Optional[str] = None,
        driver_image: Optional[str] = None,
        experiment: str = 'v2_sample_test_samples',
@ -219,11 +217,10 @@ def _run_test(callback):
        metadata_service_port: int = 8080,
    ):
        """Test file CLI entrypoint used by Fire.
+        To configure KFP endpoint, configure env vars following:
+        https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/#configure-sdk-client-by-environment-variables.
+        KFP UI endpoint can be configured by KF_PIPELINES_UI_ENDPOINT env var.

-        :param host: Hostname pipelines can access, defaults to 'http://ml-pipeline:8888'.
-        :type host: str, optional
-        :param external_host: External hostname users can access from their browsers.
-        :type external_host: str, optional
        :param pipeline_root: pipeline root that holds intermediate
        artifacts, example gs://your-bucket/path/to/workdir.
        :type pipeline_root: str, optional
@ -239,12 +236,6 @@ def _run_test(callback):
        :type metadata_service_port: int, optional
        """

-        # Default to env values, so people can set up their env and run these
-        # tests without specifying any commands.
-        if host is None:
-            host = os.getenv('KFP_HOST', 'http://ml-pipeline:8888')
-        if external_host is None:
-            external_host = host
        if pipeline_root is None:
            pipeline_root = os.getenv('KFP_PIPELINE_ROOT')
            if not pipeline_root:
@ -253,19 +244,28 @@ def _run_test(callback):
                    logger.warning(
                        f'KFP_OUTPUT_DIRECTORY env var is left for backward compatibility, please use KFP_PIPELINE_ROOT instead.'
                    )
+        logger.info(f'KFP_PIPELINE_ROOT={pipeline_root}')
        if metadata_service_host is None:
            metadata_service_host = os.getenv('METADATA_GRPC_SERVICE_HOST',
                                              'metadata-grpc-service')
+        logger.info(f'METADATA_GRPC_SERVICE_HOST={metadata_service_host}')
        if launcher_v2_image is None:
            launcher_v2_image = os.getenv('KFP_LAUNCHER_V2_IMAGE')
            if not launcher_v2_image:
                raise Exception("launcher_v2_image is empty")
+        logger.info(f'KFP_LAUNCHER_V2_IMAGE={launcher_v2_image}')
        if driver_image is None:
            driver_image = os.getenv('KFP_DRIVER_IMAGE')
            if not driver_image:
                raise Exception("driver_image is empty")
-
-        client = kfp.Client(host=host)
+        logger.info(f'KFP_DRIVER_IMAGE={driver_image}')
+        client = kfp.Client()
+        # TODO(Bobgy): avoid using private fields when getting loaded config
+        kfp_endpoint = client._existing_config.host
+        kfp_ui_endpoint = client._uihost
+        logger.info(f'KF_PIPELINES_ENDPOINT={kfp_endpoint}')
+        if kfp_ui_endpoint != kfp_endpoint:
+            logger.info(f'KF_PIPELINES_UI_ENDPOINT={kfp_ui_endpoint}')

        def run_pipeline(
            pipeline_func: Optional[Callable],
@ -347,7 +347,7 @@ def _run_test(callback):
                # There is no run_result when dry_run.
                return
            print("Run details page URL:")
-            print(f"{external_host}/#/runs/details/{run_result.run_id}")
+            print(f"{kfp_ui_endpoint}/#/runs/details/{run_result.run_id}")
            run_detail = run_result.wait_for_run_completion(timeout)
            # Hide detailed information for pretty printing
            workflow_spec = run_detail.run.pipeline_spec.workflow_manifest
@ -362,15 +362,8 @@ def _run_test(callback):

        # When running locally, port forward MLMD grpc service to localhost:8080 by:
        #
-        # ```bash
-        # NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
-        # ```
-        #
-        # Then you can uncomment the following config instead.
-        # mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
-        #     host='localhost',
-        #     port=8080,
-        # )
+        # 1. NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
+        # 2. Configure env var METADATA_GRPC_SERVICE_HOST=localhost.
        mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
            host=metadata_service_host,
            port=metadata_service_port,
--- a/sdk/python/kfp/_client.py
+++ b/sdk/python/kfp/_client.py
@ -101,10 +101,8 @@ class Client(object):
      host: The host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster
          service DNS name will be used, which only works if the current environment is a pod
          in the same cluster (such as a Jupyter instance spawned by Kubeflow's
-          JupyterHub). If you have a different connection to cluster, such as a kubectl
-          proxy connection, then set it to something like "127.0.0.1:8080/pipeline.
-          If you connect to an IAP enabled cluster, set it to
-          https://<your-deployment>.endpoints.<your-project>.cloud.goog/pipeline".
+          JupyterHub).
+          Set the host based on https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/.
      client_id: The client ID used by Identity-Aware Proxy.
      namespace: The namespace where the kubeflow pipeline system is run.
      other_client_id: The client ID used to obtain the auth codes and refresh tokens.
--- a/v2/test/Makefile
+++ b/v2/test/Makefile
@ -15,21 +15,21 @@ all: sample-test
 sample-test: upload
 # The -u flag makes python output unbuffered, so that we can see real time log.
 # Reference: https://stackoverflow.com/a/107717
-	python -u sample_test.py \
+	export KF_PIPELINES_ENDPOINT=$(HOST) \
+		&& python -u sample_test.py \
 		--samples_config samples/test/config.yaml \
 		--context $(GCS_ROOT)/src/context.tar.gz \
-		--host $(HOST) \
 		--gcs_root $(GCS_ROOT)/data \
 		--gcr_root $(GCR_ROOT) \
 		--kfp_package_path "$(KFP_PACKAGE_PATH)"

 .PHONY: integration-test
 integration-test: upload
-	python -u sample_test.py \
+	export KF_PIPELINES_ENDPOINT=$(HOST) \
+		&& python -u sample_test.py \
 		--samples_config samples/test/config-integration.yaml \
 		--timeout_mins 60 \
 		--context $(GCS_ROOT)/src/context.tar.gz \
-		--host $(HOST) \
 		--gcs_root $(GCS_ROOT)/data \
 		--gcr_root $(GCR_ROOT) \
 		--kfp_package_path "$(KFP_PACKAGE_PATH)"
--- a/v2/test/README.md
+++ b/v2/test/README.md
@ -1,19 +1,26 @@
 # Kubeflow Pipelines Sample Test Infra V2

+The following tests are running on sample test infra v2:
+
+* kubeflow-pipelines-samples-v2
+* kubeflow-pipelines-integration-v2
+
 Note, the sample test only runs on Google Cloud at the moment. Welcome
 contribution if you want to adapt it to other platforms.

 Quick Links:

-* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/48b09567c8df28fab2d3f2fb6df86defa12207fb/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L184-L192)
+* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/8e2b1e0b57d0bf7adf8e9f3cef6a98af25012412/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L185-L203)
 * [past prow jobs](https://oss-prow.knative.dev/job-history/gs/oss-prow/pr-logs/directory/kubeflow-pipelines-samples-v2)
-* [sample test config](../../samples/test/config.yaml)
+* Sample test configs
+  * [kubeflow-pipelines-samples-v2 test config](/samples/test/config.yaml)
+  * [kubeflow-pipelines-integration-v2 test config](/samples/test/config-integration.yaml)
 * [KFP test cluster hostname](https://github.com/kubeflow/testing/blob/master/test-infra/kfp/endpoint)
 * [Infra as Code configuration for kfp-ci project](https://github.com/kubeflow/testing/tree/master/test-infra/kfp).

 ## How to access the KFP UI running these tests?

-kubeflow-pipelines-sample-v2 test pipeline runs on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
+Test Kubeflow Pipelines run on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
 `kfp-ci` project, `kubeflow.org` organization.

 The test script prints KFP host URL in logs. You need to have permission to
@ -23,9 +30,11 @@ You need to join [Kubeflow ci-team google group](https://github.com/kubeflow/int
 has very wide permissions to test infra, so access will only be granted to core
 developers.

-Currently, it's not possible to grant KFP UI only permission, but we can grant
+<!--
+TODO(Bobgy): Currently, it's not possible to grant KFP UI only permission, but we can consider granting
 such access to [Kubeflow ci-viewer google group](https://github.com/kubeflow/internal-acls/blob/master/google_groups/groups/ci-viewer.yaml).
-Contact @Bobgy if you have such a need.
+Contact @zijianjoy if you have such a need.
+-->

 ## How to run the entire sample test suite in your own KFP?

@ -61,15 +70,28 @@ as expected.

 For why the caveat exists, refer to context rule in [Makefile](./Makefile).

+Run integration test by:
+
+```bash
+make integration-test
+```
+
+However, integration tests are configured to run on kfp-ci project, so modify tests locally with your own configs:
+
+* [parameterized_tfx_oss_test.py](/samples/core/parameterized_tfx_oss/parameterized_tfx_oss_test.py)
+* [dataflow_test.py](/samples/core/dataflow/dataflow_test.py)
+
 ## How to develop one single sample?

+One-time environment configurations:
+
 ```bash
 # These env vars are loaded by default, recommend configuring them in your
 # .bashrc or .zshrc
-export KFP_HOST=https://your.KFP.host
+export KF_PIPELINES_ENDPOINT=https://your.KFP.host
 export KFP_PIPELINE_ROOT=gs://your-bucket/path/to/output/dir
 export METADATA_GRPC_SERVICE_HOST=localhost
-export PATH="$HOME/bin:$PATH" # Some CLI tools will be installed to ~/bin.
+export PATH="$HOME/bin:$PATH" # The KFP v2 backend compiler CLI tool will be installed to ~/bin by make install-compiler
 # optional, when you want to override images to your dev project
 # export KFP_LAUNCHER_V2_IMAGE=gcr.io/your-project/dev/kfp-launcher-v2:latest
 # export KFP_DRIVER_IMAGE=gcr.io/your-project/kfp-driver:latest
@ -85,11 +107,18 @@ cd "${REPO_ROOT}/v2"
 # Note, when you update backend compiler code, you need to run this again!
 make install-compiler

-# Note, for tests that use metadata grpc api, you should port-forward it locally in a separate terminal by:
+# Note, for v2 tests, they use metadata grpc api, you need to port-forward it locally in a separate terminal by:
 cd "${REPO_ROOT}/v2/test"
 make mlmd-port-forward

-# To run a single sample test:
+# Install python dependencies
+cd "${REPO_ROOT}/v2/test"
+pip install -r requirements.txt
+```
+
+To run a single sample test:
+
+```bash
 cd "${REPO_ROOT}"
 # if you have a sample test at samples/path/to/your/sample_test.py
 python -m samples.path.to.your.sample_test
@ -99,16 +128,19 @@ python -m samples.path.to.your.sample_test --help

 ## How to add a sample to this sample test?

-Edit [samples/test/config.yaml](../../samples/test/config.yaml) and add your own sample.
+Edit [samples/test/config.yaml](/samples/test/config.yaml) and add your own sample.
 You can also add other samples not in the `samples/test` folder.

 Your sample test needs to conform to the standard interface in
 [components/run_sample.yaml](components/run_sample.yaml). You can refer to
-existing [sample tests](../../samples/test) for how to implement the interface.
+existing [sample tests](/samples/test) for how to implement the interface.

-## How can a sample verify MLMD status of a run?
+Some samples can be used as examples for various cases:

-Refer to [an existing test](../../samples/v2/hello_world_test.py).
+* Pipeline from a notebook, [multiple_outputs_test.py](/samples/core/multiple_outputs/multiple_outputs_test.py).
+* A sample that does not submit a pipeline, [dsl_static_type_checking_test.py](/samples/core/dsl_static_type_checking/dsl_static_type_checking_test.py).
+* V2 pipeline and verification, [hello_world_test.py](/samples/v2/hello_world_test.py).
+* V2 pipeline and control flow, [condition_test.py](/samples/core/condition/condition_test.py).

 ## FAQs

--- a/v2/test/components/run_sample.yaml
+++ b/v2/test/components/run_sample.yaml
@ -32,8 +32,8 @@ implementation:
      backend_compiler_path="$0"
      sample_path="$1"
      output_dir="$2"
-      host="$3"
-      external_host="$4"
+      export KF_PIPELINES_ENDPOINT="$3"
+      export KF_PIPELINES_UI_ENDPOINT="$4"
      launcher_v2_image="$5"
      driver_image="$6"

@ -45,8 +45,6 @@ implementation:
        -u \
        -m "$sample_path" \
        --pipeline_root "$output_dir" \
-        --host "$host" \
-        --external_host "$external_host" \
        --launcher_v2_image "$launcher_v2_image" \
        --driver_image "$driver_image"
      
--- a/v2/test/sample_test.py
+++ b/v2/test/sample_test.py
@ -104,7 +104,6 @@ def v2_sample_test(

 def main(
        context: str,
-        host: str,
        gcr_root: str,
        gcs_root: str,
        experiment: str = 'v2_sample_test',
@ -119,7 +118,9 @@ def main(
    with open(samples_config_path, 'r') as stream:
        samples_config_content = yaml.safe_load(stream)

-    client = kfp.Client(host=host)
+    client = kfp.Client()
+    # TODO(Bobgy): avoid using private fields when getting loaded config
+    host = client._existing_config.host
    client.create_experiment(
        name=experiment,
        description='An experiment with Kubeflow Pipelines v2 sample test runs.'