Delete the notebook tests because they are outdated. (#808)

* Delete the notebook tests because they are outdated.

* We have rewritten the test infra for notebooks to use Tekton.
  see:
    https://github.com/kubeflow/testing/blob/master/tekton/templates/pipelines/notebook-test-pipeline.yaml
    https://github.com/kubeflow/examples/tree/master/py/kubeflow/examples/notebook_tests

* We are also no longer regularly deploying the v1 clusters; we are no using
  blueprints so that's why the tests can no longer get credentials

* * Add the mnist notebook test as a postsubmit and periodic test.

* Fix.
This commit is contained in:
Jeremy Lewi 2020-07-07 01:23:58 -07:00 committed by GitHub
parent 10b34b8dc8
commit c880fdaa80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 51 additions and 474 deletions

View File

@ -0,0 +1,47 @@
# Demo on how to write a pipeline run.
apiVersion: tekton.dev/v1alpha1
kind: PipelineRun
metadata:
generateName: mnist-
namespace: kf-ci
labels:
pipeline: mnist-notebook
spec:
params:
# test-target-name, artifacts-gcs, and junit-path are required for
# all the PipelineRuns. For tests spawned by Prow, values of these
# params will be injected on the fly.
- name: test-target-name
value: manual-testinig
- name: artifacts-gcs
value: gs://kubeflow-ci_temp/jlewi_mnist_testing/2020-0619
- name: junit-path
value: artifacts/junit_manual-testing
- name: testing-cluster-pattern
value: 'kf-vbp-.*'
- name: testing-cluster-location
value: 'us-central1-c'
- name: notebook-output
value: gs://kubeflow-ci-deployment_ci-temp/mnist_test
- name: notebook-path
value: mnist/mnist_gcp.ipynb
resources:
- name: notebook-repo
resourceSpec:
type: git
params:
- name: url
value: https://github.com/kubeflow/examples.git
- name: revision
value: master
# The image we want to build
- name: image
resourceSpec:
type: image
params:
- name: url
value: gcr.io/kubeflow-ci-deployment/mnist-test
pipelineRef:
name: notebook-test
serviceAccountName: kf-ci

View File

@ -45,31 +45,8 @@ workflows:
include_dirs:
- pytorch_mnist/*
# E2E test for various notebooks
# New notebooks can just add a step to the workflow
- py_func: kubeflow.examples.create_e2e_workflow.create_workflow
name: notebooks
- tekton_run: kubeflow/examples/tests/mnist-nb-pipeline-run.yaml
name: mnist-notebook
job_types:
- periodic
- presubmit
- postsubmit
include_dirs:
- xgboost_synthetic/*
- mnist/*
- py/kubeflow/examples/notebook_tests
- py/kubeflow/examples/create_e2e_workflow.py
# E2E test for various notebooks
# New notebooks can just add a step to the workflow
- py_func: kubeflow.examples.create_e2e_workflow.create_workflow
name: notebooks-v1
job_types:
- periodic
- presubmit
- postsubmit
include_dirs:
- xgboost_synthetic/*
- mnist/*
- py/kubeflow/examples/create_e2e_workflow.py
kwargs:
cluster_pattern: kf-v1-(?!n\d\d)
- periodic
- postsubmit

View File

@ -1,56 +0,0 @@
import pytest
def pytest_addoption(parser):
parser.addoption(
"--name", help="Name for the job. If not specified one was created "
"automatically", type=str, default="")
parser.addoption(
"--namespace", help=("The namespace to run in. This should correspond to"
"a namespace associated with a Kubeflow namespace."),
type=str,
default="kubeflow-kf-ci-v1-user")
parser.addoption(
"--image", help="Notebook image to use", type=str,
default="gcr.io/kubeflow-images-public/"
"tensorflow-1.15.2-notebook-cpu:1.0.0")
parser.addoption(
"--repos", help="The repos to checkout; leave blank to use defaults",
type=str, default="")
parser.addoption(
"--notebook_path", help=("Path to the testing notebook file, starting from"
"the base directory of examples repository."),
type=str, default="")
parser.addoption(
"--test-target-name", help=("Test target name, used as junit class name."),
type=str, default="")
parser.addoption(
"--artifacts-gcs", help=("GCS to upload artifacts to."),
type=str, default="")
@pytest.fixture
def name(request):
return request.config.getoption("--name")
@pytest.fixture
def namespace(request):
return request.config.getoption("--namespace")
@pytest.fixture
def image(request):
return request.config.getoption("--image")
@pytest.fixture
def repos(request):
return request.config.getoption("--repos")
@pytest.fixture
def notebook_path(request):
return request.config.getoption("--notebook_path")
@pytest.fixture
def test_target_name(request):
return request.config.getoption("--test-target-name")
@pytest.fixture
def artifacts_gcs(request):
return request.config.getoption("--artifacts-gcs")

View File

@ -1,83 +0,0 @@
import argparse
import tempfile
import logging
import os
import subprocess
logger = logging.getLogger(__name__)
from google.cloud import storage
from kubeflow.testing import util
def prepare_env():
subprocess.check_call(["pip3", "install", "-Iv", "papermill==2.0.0"])
subprocess.check_call(["pip3", "install", "-U", "nbconvert"])
subprocess.check_call(["pip3", "install", "-U", "nbformat"])
def execute_notebook(notebook_path, parameters=None):
import papermill #pylint: disable=import-error
temp_dir = tempfile.mkdtemp()
notebook_output_path = os.path.join(temp_dir, "out.ipynb")
papermill.execute_notebook(notebook_path, notebook_output_path,
cwd=os.path.dirname(notebook_path),
parameters=parameters,
log_output=True)
return notebook_output_path
def _upload_notebook_html(content, target):
gcs_client = storage.Client()
bucket_name, path = util.split_gcs_uri(target)
bucket = gcs_client.get_bucket(bucket_name)
logging.info("Uploading notebook to %s.", target)
blob = bucket.blob(path)
# Need to set content type so that if we browse in GCS we end up rendering
# as html.
blob.upload_from_string(content, content_type="text/html")
def run_notebook_test(notebook_path, parameters=None):
import nbformat #pylint: disable=import-error
import nbconvert #pylint: disable=import-error
output_path = execute_notebook(notebook_path, parameters=parameters)
with open(output_path, "r") as hf:
actual_output = hf.read()
nb = nbformat.reads(actual_output, as_version=4)
html_exporter = nbconvert.HTMLExporter()
(html_output, _) = html_exporter.from_notebook_node(nb)
gcs_path = os.getenv("OUTPUT_GCS")
_upload_notebook_html(html_output, gcs_path)
class NotebookExecutor:
@staticmethod
def test(notebook_path):
"""Test a notebook.
Args:
notebook_path: Absolute path of the notebook.
"""
prepare_env()
FILE_DIR = os.path.dirname(__file__)
run_notebook_test(notebook_path)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(message)s|%(pathname)s|%(lineno)d|'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
# fire isn't available in the notebook image which is why we aren't
# using it.
parser = argparse.ArgumentParser()
parser.add_argument(
"--notebook_path", default="", type=str, help=("Path to the notebook"))
args = parser.parse_args()
NotebookExecutor.test(args.notebook_path)

View File

@ -1,55 +0,0 @@
# A batch job to run a notebook using papermill.
# The YAML is modified by nb_test_util.py to generate a Job specific
# to a notebook.
#
# TODO(jlewi): We should switch to using Tekton
apiVersion: batch/v1
kind: Job
metadata:
name: nb-test
labels:
app: nb-test
spec:
backoffLimit: 1
template:
metadata:
annotations:
# TODO(jlewi): Do we really want to disable sidecar injection
# in the test? Would it be better to use istio to mimic what happens
# in notebooks?
sidecar.istio.io/inject: "false"
labels:
app: nb-test
spec:
restartPolicy: Never
securityContext:
runAsUser: 0
initContainers:
# This init container checks out the source code.
- command:
- /usr/local/bin/checkout_repos.sh
- --repos=kubeflow/examples@$(CHECK_TAG)
- --src_dir=/src
name: checkout
image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4
volumeMounts:
- mountPath: /src
name: src
containers:
- env:
- name: PYTHONPATH
value: /src/kubeflow/examples/py/
name: executing-notebooks
image: execute-image
# Command will get overwritten by nb_test_util.py
command: ["python3", "-m",
"kubeflow.examples.notebook_tests.execute_notebook",
"test", "/src/kubeflow/examples/mnist/mnist_gcp.ipynb"]
workingDir: /src/kubeflow/examples/py/kubeflow/examples/notebook_tests
volumeMounts:
- mountPath: /src
name: src
serviceAccount: default-editor
volumes:
- name: src
emptyDir: {}

View File

@ -1,33 +0,0 @@
import datetime
import logging
import os
import uuid
import pytest
from kubeflow.examples.notebook_tests import nb_test_util
from kubeflow.testing import util
def test_mnist_gcp(record_xml_attribute, name, namespace, # pylint: disable=too-many-branches,too-many-statements
repos, image):
'''Generate Job and summit.'''
util.set_pytest_junit(record_xml_attribute, "test_mnist")
if not name:
name = "mnist-" + datetime.datetime.now().strftime("%H%M%S") + "-"
name = name + uuid.uuid4().hex[0:3]
util.set_pytest_junit(record_xml_attribute, "test_mnist_gcp")
notebook_path = "kubeflow/examples/mnist/mnist_gcp.ipynb"
nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(pathname)s|%(lineno)d| %(message)s'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
logging.getLogger().setLevel(logging.INFO)
pytest.main()

View File

@ -1,184 +0,0 @@
"""Some utitilies for running notebook tests."""
import datetime
import logging
import os
from urllib.parse import urlencode
import uuid
import yaml
from google.cloud import storage
from kubernetes import client as k8s_client
from kubeflow.testing import argo_build_util
from kubeflow.testing import prow_artifacts
from kubeflow.testing import util
# This is the bucket where the batch jobs will uploaded an HTML version of the
# notebook will be written to. The K8s job is running in a Kubeflow cluster
# so it needs to be a bucket that the kubeflow cluster can write to.
# This is why we don't write directly to the bucket used for prow artifacts
NB_BUCKET = "kubeflow-ci-deployment"
PROJECT = "kbueflow-ci-deployment"
def logs_for_job(project, job_name):
"""Get a stack driver link for the job with the specified name."""
logs_filter = f"""resource.type="k8s_container"
labels."k8s-pod/job-name" = "{job_name}"
"""
new_params = {"project": project,
# Logs for last 7 days
"interval": 'P7D',
"advancedFilter": logs_filter}
query = urlencode(new_params)
url = "https://console.cloud.google.com/logs/viewer?" + query
return url
def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements
repos, image, artifacts_gcs="", test_target_name=""):
"""Generate a K8s job to run a notebook using papermill
Args:
notebook_path: Path to the notebook. This should be in the form
"{REPO_OWNER}/{REPO}/path/to/notebook.ipynb"
name: Name for the K8s job
namespace: The namespace where the job should run.
repos: Which repos to checkout; if None or empty tries
to infer based on PROW environment variables
image: The docker image to run the notebook in.
"""
util.maybe_activate_service_account()
with open("job.yaml") as hf:
job = yaml.load(hf)
if notebook_path.startswith("/"):
raise ValueError("notebook_path={0} should not start with /".format(
notebook_path))
# We need to checkout the correct version of the code
# in presubmits and postsubmits. We should check the environment variables
# for the prow environment variables to get the appropriate values.
# We should probably also only do that if the
# See
# https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables
if not repos:
repos = argo_build_util.get_repo_from_prow_env()
logging.info(f"Using repos {repos}")
if not repos:
raise ValueError("Could not get repos from prow environment variable "
"and --repos isn't explicitly set")
repos += ",kubeflow/testing@HEAD"
logging.info("Repos set to %s", repos)
job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [
"/usr/local/bin/checkout_repos.sh",
"--repos=" + repos,
"--src_dir=/src",
"--depth=all",
]
job["spec"]["template"]["spec"]["containers"][0]["image"] = image
full_notebook_path = os.path.join("/src", notebook_path)
job["spec"]["template"]["spec"]["containers"][0]["command"] = [
"python3", "-m",
"kubeflow.examples.notebook_tests.execute_notebook",
"--notebook_path", full_notebook_path]
job["spec"]["template"]["spec"]["containers"][0][
"workingDir"] = os.path.dirname(full_notebook_path)
# The prow bucket to use for results/artifacts
prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET
if artifacts_gcs:
prow_dir = os.path.join(artifacts_gcs, "artifacts")
if test_target_name:
prow_dir = os.path.join(prow_dir, test_target_name)
logging.info("Prow artifacts directory: %s", prow_dir)
prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
elif os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"):
# Running under prow
prow_dir = prow_artifacts.get_gcs_dir(prow_bucket)
logging.info("Prow artifacts dir: %s", prow_dir)
prow_dir = os.path.join(prow_dir, "artifacts")
if os.getenv("TEST_TARGET_NAME"):
prow_dir = os.path.join(
prow_dir, os.getenv("TEST_TARGET_NAME").lstrip("/"))
prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
else:
prow_path = "notebook-test" + datetime.datetime.now().strftime("%H%M%S")
prow_path = prow_path + "-" + uuid.uuid4().hex[0:3]
prow_dir = util.to_gcs_uri(prow_bucket, prow_path)
prow_path = os.path.join(prow_path, name + ".html")
output_gcs = util.to_gcs_uri(NB_BUCKET, prow_path)
job["spec"]["template"]["spec"]["containers"][0]["env"] = [
{"name": "OUTPUT_GCS", "value": output_gcs},
{"name": "PYTHONPATH",
"value": "/src/kubeflow/testing/py:/src/kubeflow/examples/py"},
]
logging.info("Notebook will be written to %s", output_gcs)
util.load_kube_config(persist_config=False)
if name:
job["metadata"]["name"] = name
else:
job["metadata"]["name"] = ("notebook-test-" +
datetime.datetime.now().strftime("%H%M%S")
+ "-" + uuid.uuid4().hex[0:3])
name = job["metadata"]["name"]
job["metadata"]["namespace"] = namespace
# Create an API client object to talk to the K8s master.
api_client = k8s_client.ApiClient()
batch_api = k8s_client.BatchV1Api(api_client)
logging.info("Creating job:\n%s", yaml.dump(job))
actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"],
job)
logging.info("Created job %s.%s:\n%s", namespace, name,
yaml.safe_dump(actual_job.to_dict()))
logging.info("*********************Job logs************************")
logging.info(logs_for_job(PROJECT, name))
logging.info("*****************************************************")
final_job = util.wait_for_job(api_client, namespace, name,
timeout=datetime.timedelta(minutes=30))
logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict()))
logging.info("*********************Job logs************************")
logging.info(logs_for_job(PROJECT, name))
logging.info("*****************************************************")
# Download notebook html to artifacts
logging.info("Copying %s to bucket %s", output_gcs, prow_bucket)
storage_client = storage.Client()
bucket = storage_client.get_bucket(NB_BUCKET)
blob = bucket.get_blob(prow_path)
destination_bucket = storage_client.get_bucket(prow_bucket)
bucket.copy_blob(blob, destination_bucket)
if not final_job.status.conditions:
raise RuntimeError("Job {0}.{1}; did not complete".format(namespace, name))
last_condition = final_job.status.conditions[-1]
if last_condition.type not in ["Complete"]:
logging.error("Job didn't complete successfully")
raise RuntimeError("Job {0}.{1} failed".format(namespace, name))

View File

@ -1,36 +0,0 @@
"""Runs notebook ipynb as test."""
import datetime
import logging
import os
import re
import uuid
import pytest
from kubeflow.examples.notebook_tests import nb_test_util
from kubeflow.testing import util
def test_run_notebook(record_xml_attribute, namespace, # pylint: disable=too-many-branches,too-many-statements
repos, image, notebook_path, test_target_name,
artifacts_gcs):
notebook_name = os.path.basename(
notebook_path).replace(".ipynb", "").replace("_", "-")
junit_name = "_".join(["test", notebook_name])
util.set_pytest_junit(record_xml_attribute, junit_name, test_target_name)
name = "-".join([notebook_name,
datetime.datetime.now().strftime("%H%M%S"),
uuid.uuid4().hex[0:3]])
nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image,
artifacts_gcs, test_target_name)
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(pathname)s|%(lineno)d| %(message)s'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
logging.getLogger().setLevel(logging.INFO)
pytest.main()