Delete the notebook tests because they are outdated. (#808)

* Delete the notebook tests because they are outdated. * We have rewritten the test infra for notebooks to use Tekton. see: https://github.com/kubeflow/testing/blob/master/tekton/templates/pipelines/notebook-test-pipeline.yaml https://github.com/kubeflow/examples/tree/master/py/kubeflow/examples/notebook_tests * We are also no longer regularly deploying the v1 clusters; we are no using blueprints so that's why the tests can no longer get credentials * * Add the mnist notebook test as a postsubmit and periodic test. * Fix.
2020-07-07 01:23:58 -07:00 · 2020-07-07 01:23:58 -07:00 · c880fdaa80
parent 10b34b8dc8
commit c880fdaa80
8 changed files with 51 additions and 474 deletions
--- a/mnist/tests/mnist-nb-pipeline-run.yaml
+++ b/mnist/tests/mnist-nb-pipeline-run.yaml
@ -0,0 +1,47 @@
+# Demo on how to write a pipeline run.
+
+apiVersion: tekton.dev/v1alpha1
+kind: PipelineRun
+metadata:
+  generateName: mnist-
+  namespace: kf-ci
+  labels:
+    pipeline: mnist-notebook
+spec:
+  params:
+  # test-target-name, artifacts-gcs, and junit-path are required for
+  # all the PipelineRuns. For tests spawned by Prow, values of these
+  # params will be injected on the fly.
+  - name: test-target-name
+    value: manual-testinig
+  - name: artifacts-gcs
+    value: gs://kubeflow-ci_temp/jlewi_mnist_testing/2020-0619
+  - name: junit-path
+    value: artifacts/junit_manual-testing
+  - name: testing-cluster-pattern
+    value: 'kf-vbp-.*'
+  - name: testing-cluster-location
+    value: 'us-central1-c'
+  - name: notebook-output
+    value: gs://kubeflow-ci-deployment_ci-temp/mnist_test
+  - name: notebook-path
+    value: mnist/mnist_gcp.ipynb
+  resources:
+  - name: notebook-repo
+    resourceSpec:
+      type: git
+      params:
+      - name: url
+        value: https://github.com/kubeflow/examples.git
+      - name: revision
+        value: master
+  # The image we want to build
+  - name: image
+    resourceSpec:
+      type: image
+      params:
+      - name: url
+        value: gcr.io/kubeflow-ci-deployment/mnist-test
+  pipelineRef:
+    name: notebook-test
+  serviceAccountName: kf-ci
--- a/prow_config.yaml
+++ b/prow_config.yaml
@ -45,31 +45,8 @@ workflows:
    include_dirs:
      - pytorch_mnist/*

-  # E2E test for various notebooks
-  # New notebooks can just add a step to the workflow
-  - py_func: kubeflow.examples.create_e2e_workflow.create_workflow
-    name: notebooks
+  - tekton_run: kubeflow/examples/tests/mnist-nb-pipeline-run.yaml
+    name: mnist-notebook
    job_types:
-      - periodic
-      - presubmit
-      - postsubmit
-    include_dirs:
-      - xgboost_synthetic/*
-      - mnist/*
-      - py/kubeflow/examples/notebook_tests
-      - py/kubeflow/examples/create_e2e_workflow.py
-
-  # E2E test for various notebooks
-  # New notebooks can just add a step to the workflow
-  - py_func: kubeflow.examples.create_e2e_workflow.create_workflow
-    name: notebooks-v1
-    job_types:
-      - periodic
-      - presubmit
-      - postsubmit
-    include_dirs:
-      - xgboost_synthetic/*
-      - mnist/*
-      - py/kubeflow/examples/create_e2e_workflow.py
-    kwargs:
-      cluster_pattern: kf-v1-(?!n\d\d)
+    - periodic
+    - postsubmit
--- a/py/kubeflow/examples/notebook_tests/conftest.py
+++ b/py/kubeflow/examples/notebook_tests/conftest.py
@ -1,56 +0,0 @@
-import pytest
-
-def pytest_addoption(parser):
-  parser.addoption(
-    "--name", help="Name for the job. If not specified one was created "
-    "automatically", type=str, default="")
-  parser.addoption(
-    "--namespace", help=("The namespace to run in. This should correspond to"
-                         "a namespace associated with a Kubeflow namespace."),
-                   type=str,
-    default="kubeflow-kf-ci-v1-user")
-  parser.addoption(
-    "--image", help="Notebook image to use", type=str,
-    default="gcr.io/kubeflow-images-public/"
-            "tensorflow-1.15.2-notebook-cpu:1.0.0")
-  parser.addoption(
-    "--repos", help="The repos to checkout; leave blank to use defaults",
-    type=str, default="")
-  parser.addoption(
-    "--notebook_path", help=("Path to the testing notebook file, starting from"
-                             "the base directory of examples repository."),
-    type=str, default="")
-  parser.addoption(
-    "--test-target-name", help=("Test target name, used as junit class name."),
-    type=str, default="")
-  parser.addoption(
-    "--artifacts-gcs", help=("GCS to upload artifacts to."),
-    type=str, default="")
-
-@pytest.fixture
-def name(request):
-  return request.config.getoption("--name")
-
-@pytest.fixture
-def namespace(request):
-  return request.config.getoption("--namespace")
-
-@pytest.fixture
-def image(request):
-  return request.config.getoption("--image")
-
-@pytest.fixture
-def repos(request):
-  return request.config.getoption("--repos")
-
-@pytest.fixture
-def notebook_path(request):
-  return request.config.getoption("--notebook_path")
-
-@pytest.fixture
-def test_target_name(request):
-  return request.config.getoption("--test-target-name")
-
-@pytest.fixture
-def artifacts_gcs(request):
-  return request.config.getoption("--artifacts-gcs")
--- a/py/kubeflow/examples/notebook_tests/execute_notebook.py
+++ b/py/kubeflow/examples/notebook_tests/execute_notebook.py
@ -1,83 +0,0 @@
-import argparse
-import tempfile
-import logging
-import os
-import subprocess
-
-logger = logging.getLogger(__name__)
-
-from google.cloud import storage
-from kubeflow.testing import util
-
-def prepare_env():
-  subprocess.check_call(["pip3", "install", "-Iv", "papermill==2.0.0"])
-  subprocess.check_call(["pip3", "install", "-U", "nbconvert"])
-  subprocess.check_call(["pip3", "install", "-U", "nbformat"])
-
-def execute_notebook(notebook_path, parameters=None):
-  import papermill #pylint: disable=import-error
-  temp_dir = tempfile.mkdtemp()
-  notebook_output_path = os.path.join(temp_dir, "out.ipynb")
-  papermill.execute_notebook(notebook_path, notebook_output_path,
-                             cwd=os.path.dirname(notebook_path),
-                             parameters=parameters,
-                             log_output=True)
-  return notebook_output_path
-
-def _upload_notebook_html(content, target):
-  gcs_client = storage.Client()
-  bucket_name, path = util.split_gcs_uri(target)
-
-  bucket = gcs_client.get_bucket(bucket_name)
-
-  logging.info("Uploading notebook to %s.", target)
-  blob = bucket.blob(path)
-  # Need to set content type so that if we browse in GCS we end up rendering
-  # as html.
-  blob.upload_from_string(content, content_type="text/html")
-
-def run_notebook_test(notebook_path, parameters=None):
-  import nbformat #pylint: disable=import-error
-  import nbconvert #pylint: disable=import-error
-
-  output_path = execute_notebook(notebook_path, parameters=parameters)
-
-  with open(output_path, "r") as hf:
-    actual_output = hf.read()
-
-  nb = nbformat.reads(actual_output, as_version=4)
-  html_exporter = nbconvert.HTMLExporter()
-  (html_output, _) = html_exporter.from_notebook_node(nb)
-  gcs_path = os.getenv("OUTPUT_GCS")
-  _upload_notebook_html(html_output, gcs_path)
-
-class NotebookExecutor:
-  @staticmethod
-  def test(notebook_path):
-    """Test a notebook.
-
-    Args:
-      notebook_path: Absolute path of the notebook.
-    """
-    prepare_env()
-    FILE_DIR = os.path.dirname(__file__)
-
-    run_notebook_test(notebook_path)
-
-if __name__ == "__main__":
-  logging.basicConfig(level=logging.INFO,
-                      format=('%(levelname)s|%(asctime)s'
-                              '|%(message)s|%(pathname)s|%(lineno)d|'),
-                      datefmt='%Y-%m-%dT%H:%M:%S',
-                      )
-
-  # fire isn't available in the notebook image which is why we aren't
-  # using it.
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-    "--notebook_path", default="", type=str, help=("Path to the notebook"))
-
-  args = parser.parse_args()
-
-  NotebookExecutor.test(args.notebook_path)
-
--- a/py/kubeflow/examples/notebook_tests/job.yaml
+++ b/py/kubeflow/examples/notebook_tests/job.yaml
@ -1,55 +0,0 @@
-# A batch job to run a notebook using papermill.
-# The YAML is modified by nb_test_util.py to generate a Job specific
-# to a notebook.
-#
-# TODO(jlewi): We should switch to using Tekton
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: nb-test
-  labels:
-    app: nb-test
-spec:
-  backoffLimit: 1
-  template:
-    metadata:
-      annotations:
-        # TODO(jlewi): Do we really want to disable sidecar injection
-        # in the test? Would it be better to use istio to mimic what happens
-        # in notebooks?
-        sidecar.istio.io/inject: "false"
-      labels:
-        app: nb-test
-    spec:
-      restartPolicy: Never
-      securityContext:
-        runAsUser: 0
-      initContainers:
-      # This init container checks out the source code.
-      - command:
-        - /usr/local/bin/checkout_repos.sh
-        - --repos=kubeflow/examples@$(CHECK_TAG)
-        - --src_dir=/src
-        name: checkout
-        image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4
-        volumeMounts:
-        - mountPath: /src
-          name: src
-      containers:
-      - env:
-        - name: PYTHONPATH
-          value: /src/kubeflow/examples/py/
-        name: executing-notebooks
-        image: execute-image
-        # Command will get overwritten by nb_test_util.py
-        command: ["python3", "-m",
-                  "kubeflow.examples.notebook_tests.execute_notebook",
-                  "test", "/src/kubeflow/examples/mnist/mnist_gcp.ipynb"]
-        workingDir: /src/kubeflow/examples/py/kubeflow/examples/notebook_tests
-        volumeMounts:
-        - mountPath: /src
-          name: src
-      serviceAccount: default-editor
-      volumes:
-      - name: src
-        emptyDir: {}
--- a/py/kubeflow/examples/notebook_tests/mnist_gcp_test.py
+++ b/py/kubeflow/examples/notebook_tests/mnist_gcp_test.py
@ -1,33 +0,0 @@
-import datetime
-import logging
-import os
-import uuid
-
-import pytest
-
-from kubeflow.examples.notebook_tests import nb_test_util
-from kubeflow.testing import util
-
-def test_mnist_gcp(record_xml_attribute, name, namespace, # pylint: disable=too-many-branches,too-many-statements
-                   repos, image):
-  '''Generate Job and summit.'''
-  util.set_pytest_junit(record_xml_attribute, "test_mnist")
-
-  if not name:
-    name = "mnist-" + datetime.datetime.now().strftime("%H%M%S") + "-"
-    name = name + uuid.uuid4().hex[0:3]
-
-  util.set_pytest_junit(record_xml_attribute, "test_mnist_gcp")
-
-  notebook_path = "kubeflow/examples/mnist/mnist_gcp.ipynb"
-  nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image)
-
-
-if __name__ == "__main__":
-  logging.basicConfig(level=logging.INFO,
-                      format=('%(levelname)s|%(asctime)s'
-                              '|%(pathname)s|%(lineno)d| %(message)s'),
-                      datefmt='%Y-%m-%dT%H:%M:%S',
-                      )
-  logging.getLogger().setLevel(logging.INFO)
-  pytest.main()
--- a/py/kubeflow/examples/notebook_tests/nb_test_util.py
+++ b/py/kubeflow/examples/notebook_tests/nb_test_util.py
@ -1,184 +0,0 @@
-"""Some utitilies for running notebook tests."""
-
-import datetime
-import logging
-import os
-from urllib.parse import urlencode
-import uuid
-import yaml
-
-from google.cloud import storage
-from kubernetes import client as k8s_client
-from kubeflow.testing import argo_build_util
-from kubeflow.testing import prow_artifacts
-from kubeflow.testing import util
-
-# This is the bucket where the batch jobs will uploaded an HTML version of the
-# notebook will be written to. The K8s job is running in a Kubeflow cluster
-# so it needs to be a bucket that the kubeflow cluster can write to.
-# This is why we don't write directly to the bucket used for prow artifacts
-NB_BUCKET = "kubeflow-ci-deployment"
-PROJECT = "kbueflow-ci-deployment"
-
-def logs_for_job(project, job_name):
-  """Get a stack driver link for the job with the specified name."""
-  logs_filter = f"""resource.type="k8s_container"
-   labels."k8s-pod/job-name" = "{job_name}"
-"""
-
-  new_params = {"project": project,
-                # Logs for last 7 days
-                "interval": 'P7D',
-                "advancedFilter": logs_filter}
-
-  query = urlencode(new_params)
-
-  url = "https://console.cloud.google.com/logs/viewer?" + query
-
-  return url
-
-def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements
-                      repos, image, artifacts_gcs="", test_target_name=""):
-  """Generate a K8s job to run a notebook using papermill
-
-  Args:
-    notebook_path: Path to the notebook. This should be in the form
-      "{REPO_OWNER}/{REPO}/path/to/notebook.ipynb"
-    name: Name for the K8s job
-    namespace: The namespace where the job should run.
-    repos: Which repos to checkout; if None or empty tries
-      to infer based on PROW environment variables
-    image: The docker image to run the notebook in.
-  """
-
-  util.maybe_activate_service_account()
-
-  with open("job.yaml") as hf:
-    job = yaml.load(hf)
-
-  if notebook_path.startswith("/"):
-    raise ValueError("notebook_path={0} should not start with /".format(
-      notebook_path))
-
-  # We need to checkout the correct version of the code
-  # in presubmits and postsubmits. We should check the environment variables
-  # for the prow environment variables to get the appropriate values.
-  # We should probably also only do that if the
-  # See
-  # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables
-  if not repos:
-    repos = argo_build_util.get_repo_from_prow_env()
-    logging.info(f"Using repos {repos}")
-
-  if not repos:
-    raise ValueError("Could not get repos from prow environment variable "
-                     "and --repos isn't explicitly set")
-
-  repos += ",kubeflow/testing@HEAD"
-
-  logging.info("Repos set to %s", repos)
-  job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [
-    "/usr/local/bin/checkout_repos.sh",
-    "--repos=" + repos,
-    "--src_dir=/src",
-    "--depth=all",
-  ]
-
-  job["spec"]["template"]["spec"]["containers"][0]["image"] = image
-
-  full_notebook_path = os.path.join("/src", notebook_path)
-  job["spec"]["template"]["spec"]["containers"][0]["command"] = [
-    "python3", "-m",
-    "kubeflow.examples.notebook_tests.execute_notebook",
-    "--notebook_path", full_notebook_path]
-
-  job["spec"]["template"]["spec"]["containers"][0][
-    "workingDir"] = os.path.dirname(full_notebook_path)
-
-  # The prow bucket to use for results/artifacts
-  prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET
-
-  if artifacts_gcs:
-    prow_dir = os.path.join(artifacts_gcs, "artifacts")
-    if test_target_name:
-      prow_dir = os.path.join(prow_dir, test_target_name)
-    logging.info("Prow artifacts directory: %s", prow_dir)
-    prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
-  elif os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"):
-    # Running under prow
-    prow_dir = prow_artifacts.get_gcs_dir(prow_bucket)
-    logging.info("Prow artifacts dir: %s", prow_dir)
-    prow_dir = os.path.join(prow_dir, "artifacts")
-
-    if os.getenv("TEST_TARGET_NAME"):
-      prow_dir = os.path.join(
-        prow_dir, os.getenv("TEST_TARGET_NAME").lstrip("/"))
-    prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
-
-  else:
-    prow_path = "notebook-test" + datetime.datetime.now().strftime("%H%M%S")
-    prow_path = prow_path + "-" + uuid.uuid4().hex[0:3]
-    prow_dir = util.to_gcs_uri(prow_bucket, prow_path)
-
-  prow_path = os.path.join(prow_path, name + ".html")
-  output_gcs = util.to_gcs_uri(NB_BUCKET, prow_path)
-
-  job["spec"]["template"]["spec"]["containers"][0]["env"] = [
-    {"name": "OUTPUT_GCS", "value": output_gcs},
-    {"name": "PYTHONPATH",
-     "value": "/src/kubeflow/testing/py:/src/kubeflow/examples/py"},
-  ]
-
-  logging.info("Notebook will be written to %s", output_gcs)
-  util.load_kube_config(persist_config=False)
-
-  if name:
-    job["metadata"]["name"] = name
-  else:
-    job["metadata"]["name"] = ("notebook-test-" +
-                               datetime.datetime.now().strftime("%H%M%S")
-                               + "-" + uuid.uuid4().hex[0:3])
-  name = job["metadata"]["name"]
-
-  job["metadata"]["namespace"] = namespace
-
-  # Create an API client object to talk to the K8s master.
-  api_client = k8s_client.ApiClient()
-  batch_api = k8s_client.BatchV1Api(api_client)
-
-  logging.info("Creating job:\n%s", yaml.dump(job))
-  actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"],
-                                               job)
-  logging.info("Created job %s.%s:\n%s", namespace, name,
-               yaml.safe_dump(actual_job.to_dict()))
-
-  logging.info("*********************Job logs************************")
-  logging.info(logs_for_job(PROJECT, name))
-  logging.info("*****************************************************")
-  final_job = util.wait_for_job(api_client, namespace, name,
-                                timeout=datetime.timedelta(minutes=30))
-
-  logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict()))
-
-  logging.info("*********************Job logs************************")
-  logging.info(logs_for_job(PROJECT, name))
-  logging.info("*****************************************************")
-
-  # Download notebook html to artifacts
-  logging.info("Copying %s to bucket %s", output_gcs, prow_bucket)
-
-  storage_client = storage.Client()
-  bucket = storage_client.get_bucket(NB_BUCKET)
-  blob = bucket.get_blob(prow_path)
-
-  destination_bucket = storage_client.get_bucket(prow_bucket)
-  bucket.copy_blob(blob, destination_bucket)
-
-  if not final_job.status.conditions:
-    raise RuntimeError("Job {0}.{1}; did not complete".format(namespace, name))
-
-  last_condition = final_job.status.conditions[-1]
-
-  if last_condition.type not in ["Complete"]:
-    logging.error("Job didn't complete successfully")
-    raise RuntimeError("Job {0}.{1} failed".format(namespace, name))
--- a/py/kubeflow/examples/notebook_tests/run_notebook_test.py
+++ b/py/kubeflow/examples/notebook_tests/run_notebook_test.py
@ -1,36 +0,0 @@
-"""Runs notebook ipynb as test."""
-
-import datetime
-import logging
-import os
-import re
-import uuid
-
-import pytest
-
-from kubeflow.examples.notebook_tests import nb_test_util
-from kubeflow.testing import util
-
-def test_run_notebook(record_xml_attribute, namespace, # pylint: disable=too-many-branches,too-many-statements
-                      repos, image, notebook_path, test_target_name,
-                      artifacts_gcs):
-  notebook_name = os.path.basename(
-      notebook_path).replace(".ipynb", "").replace("_", "-")
-  junit_name = "_".join(["test", notebook_name])
-  util.set_pytest_junit(record_xml_attribute, junit_name, test_target_name)
-
-  name = "-".join([notebook_name,
-                   datetime.datetime.now().strftime("%H%M%S"),
-                   uuid.uuid4().hex[0:3]])
-
-  nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image,
-                                 artifacts_gcs, test_target_name)
-
-if __name__ == '__main__':
-  logging.basicConfig(level=logging.INFO,
-                      format=('%(levelname)s|%(asctime)s'
-                              '|%(pathname)s|%(lineno)d| %(message)s'),
-                      datefmt='%Y-%m-%dT%H:%M:%S',
-                      )
-  logging.getLogger().setLevel(logging.INFO)
-  pytest.main()