Some improvements to utilities for testing notebooks (#803)

* Changes pulled in from kuueflow/examples#764

* Notebook tests should print a link to the stackdriver logs for
  the actual notebook job.

* Related to kubeflow/testing#613

Co-authored-by: Gabriel Wen <gabrielwen@google.com>
This commit is contained in:
Jeremy Lewi 2020-06-14 20:21:56 -07:00 committed by GitHub
parent c9a10863c3
commit 197abc9daa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 10 deletions

View File

@ -20,6 +20,12 @@ def pytest_addoption(parser):
"--notebook_path", help=("Path to the testing notebook file, starting from" "--notebook_path", help=("Path to the testing notebook file, starting from"
"the base directory of examples repository."), "the base directory of examples repository."),
type=str, default="") type=str, default="")
parser.addoption(
"--test-target-name", help=("Test target name, used as junit class name."),
type=str, default="")
parser.addoption(
"--artifacts-gcs", help=("GCS to upload artifacts to."),
type=str, default="")
@pytest.fixture @pytest.fixture
def name(request): def name(request):
@ -40,3 +46,11 @@ def repos(request):
@pytest.fixture @pytest.fixture
def notebook_path(request): def notebook_path(request):
return request.config.getoption("--notebook_path") return request.config.getoption("--notebook_path")
@pytest.fixture
def test_target_name(request):
return request.config.getoption("--test-target-name")
@pytest.fixture
def artifacts_gcs(request):
return request.config.getoption("--artifacts-gcs")

View File

@ -3,8 +3,8 @@
import datetime import datetime
import logging import logging
import os import os
from urllib.parse import urlencode
import uuid import uuid
import tempfile
import yaml import yaml
from google.cloud import storage from google.cloud import storage
@ -20,8 +20,25 @@ from kubeflow.testing import util
NB_BUCKET = "kubeflow-ci-deployment" NB_BUCKET = "kubeflow-ci-deployment"
PROJECT = "kbueflow-ci-deployment" PROJECT = "kbueflow-ci-deployment"
def logs_for_job(project, job_name):
"""Get a stack driver link for the job with the specified name."""
logs_filter = f"""resource.type="k8s_container"
labels."k8s-pod/job-name" = "{job_name}"
"""
new_params = {"project": project,
# Logs for last 7 days
"interval": 'P7D',
"advancedFilter": logs_filter}
query = urlencode(new_params)
url = "https://console.cloud.google.com/logs/viewer?" + query
return url
def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements
repos, image): repos, image, artifacts_gcs="", test_target_name=""):
"""Generate a K8s job to run a notebook using papermill """Generate a K8s job to run a notebook using papermill
Args: Args:
@ -41,7 +58,7 @@ def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many
if notebook_path.startswith("/"): if notebook_path.startswith("/"):
raise ValueError("notebook_path={0} should not start with /".format( raise ValueError("notebook_path={0} should not start with /".format(
notebook_path)) notebook_path))
# We need to checkout the correct version of the code # We need to checkout the correct version of the code
# in presubmits and postsubmits. We should check the environment variables # in presubmits and postsubmits. We should check the environment variables
@ -51,6 +68,7 @@ def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many
# https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables
if not repos: if not repos:
repos = argo_build_util.get_repo_from_prow_env() repos = argo_build_util.get_repo_from_prow_env()
logging.info(f"Using repos {repos}")
if not repos: if not repos:
raise ValueError("Could not get repos from prow environment variable " raise ValueError("Could not get repos from prow environment variable "
@ -75,12 +93,18 @@ def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many
"--notebook_path", full_notebook_path] "--notebook_path", full_notebook_path]
job["spec"]["template"]["spec"]["containers"][0][ job["spec"]["template"]["spec"]["containers"][0][
"workingDir"] = os.path.dirname(full_notebook_path) "workingDir"] = os.path.dirname(full_notebook_path)
# The prow bucket to use for results/artifacts # The prow bucket to use for results/artifacts
prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET
if os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"): if artifacts_gcs:
prow_dir = os.path.join(artifacts_gcs, "artifacts")
if test_target_name:
prow_dir = os.path.join(prow_dir, test_target_name)
logging.info("Prow artifacts directory: %s", prow_dir)
prow_bucket, prow_path = util.split_gcs_uri(prow_dir)
elif os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"):
# Running under prow # Running under prow
prow_dir = prow_artifacts.get_gcs_dir(prow_bucket) prow_dir = prow_artifacts.get_gcs_dir(prow_bucket)
logging.info("Prow artifacts dir: %s", prow_dir) logging.info("Prow artifacts dir: %s", prow_dir)
@ -128,11 +152,18 @@ def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many
logging.info("Created job %s.%s:\n%s", namespace, name, logging.info("Created job %s.%s:\n%s", namespace, name,
yaml.safe_dump(actual_job.to_dict())) yaml.safe_dump(actual_job.to_dict()))
logging.info("*********************Job logs************************")
logging.info(logs_for_job(PROJECT, name))
logging.info("*****************************************************")
final_job = util.wait_for_job(api_client, namespace, name, final_job = util.wait_for_job(api_client, namespace, name,
timeout=datetime.timedelta(minutes=30)) timeout=datetime.timedelta(minutes=30))
logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict())) logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict()))
logging.info("*********************Job logs************************")
logging.info(logs_for_job(PROJECT, name))
logging.info("*****************************************************")
# Download notebook html to artifacts # Download notebook html to artifacts
logging.info("Copying %s to bucket %s", output_gcs, prow_bucket) logging.info("Copying %s to bucket %s", output_gcs, prow_bucket)
@ -151,4 +182,3 @@ def run_papermill_job(notebook_path, name, namespace, # pylint: disable=too-many
if last_condition.type not in ["Complete"]: if last_condition.type not in ["Complete"]:
logging.error("Job didn't complete successfully") logging.error("Job didn't complete successfully")
raise RuntimeError("Job {0}.{1} failed".format(namespace, name)) raise RuntimeError("Job {0}.{1} failed".format(namespace, name))

View File

@ -12,18 +12,19 @@ from kubeflow.examples.notebook_tests import nb_test_util
from kubeflow.testing import util from kubeflow.testing import util
def test_run_notebook(record_xml_attribute, namespace, # pylint: disable=too-many-branches,too-many-statements def test_run_notebook(record_xml_attribute, namespace, # pylint: disable=too-many-branches,too-many-statements
repos, image, notebook_path): repos, image, notebook_path, test_target_name,
artifacts_gcs):
notebook_name = os.path.basename( notebook_name = os.path.basename(
notebook_path).replace(".ipynb", "").replace("_", "-") notebook_path).replace(".ipynb", "").replace("_", "-")
junit_name = "_".join(["test", notebook_name]) junit_name = "_".join(["test", notebook_name])
util.set_pytest_junit(record_xml_attribute, junit_name) util.set_pytest_junit(record_xml_attribute, junit_name, test_target_name)
name = "-".join([notebook_name, name = "-".join([notebook_name,
datetime.datetime.now().strftime("%H%M%S"), datetime.datetime.now().strftime("%H%M%S"),
uuid.uuid4().hex[0:3]]) uuid.uuid4().hex[0:3]])
util.set_pytest_junit(record_xml_attribute, junit_name) nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image,
nb_test_util.run_papermill_job(notebook_path, name, namespace, repos, image) artifacts_gcs, test_target_name)
if __name__ == '__main__': if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,