mirror of https://github.com/kubeflow/examples.git
Update xgboost_synthetic test infra; preliminary updates to work with 0.7.0 (#666)
* Update xgboost_synthetic test infra to use pytest and pyfunc. * Related to #655 update xgboost_synthetic to use workload identity * Related to to #665 no signal about xgboost_synthetic * We need to update the xgboost_synthetic example to work with 0.7.0; e.g. workload identity * This PR focuses on updating the test infra and some preliminary updates the notebook * More fixes to the test and the notebook are probably needed in order to get it to actually pass * Update job spec for 0.7; remove the secret and set the default service account. * This is to make it work with workload identity * Instead of using kustomize to define the job to run the notebook we can just modify the YAML spec using python. * Use the python API for K8s to create the job rather than shelling out. * Notebook should do a 0.7 compatible check for credentials * We don't want to assume GOOGLE_APPLICATION_CREDENTIALS is set because we will be using workload identity. * Take in repos as an argument akin to what checkout_repos.sh requires * Convert xgboost_test.py to a pytest. * This allows us to mark it as expected to fail so we can start to get signal without blocking * We also need to emit junit files to show up in test grid. * Convert the jsonnet workflow for the E2E test to a python function to define the workflow. * Remove the old jsonnet workflow. * Address comments. * Fix issues with the notebook * Install pip packages in user space * 0.7.0 images are based on TF images and they have different permissions * Install a newer version of fairing sdk that works with workload identity * Split pip installing dependencies out of util.py and into notebook_setup.py * That's because util.py could depend on the packages being installed by notebook_setup.py * After pip installing the modules into user space; we need to add the local path for pip packages to the python otherwise we get import not found errors.
This commit is contained in:
parent
6b37a40293
commit
7e28cd6b23
|
@ -56,7 +56,7 @@ confidence=
|
|||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use"--disable=all --enable=classes
|
||||
# --disable=W"
|
||||
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals
|
||||
disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals,no-name-in-module,too-many-instance-attributes,no-self-use
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# This file configures the workflows to trigger in our Prow jobs.
|
||||
# see kubeflow/testing/py/run_e2e_workflow.py
|
||||
python_paths:
|
||||
- kubeflow/examples/py
|
||||
workflows:
|
||||
- app_dir: kubeflow/examples/test/workflows
|
||||
component: workflows
|
||||
|
@ -62,10 +64,10 @@ workflows:
|
|||
include_dirs:
|
||||
- pytorch_mnist/*
|
||||
|
||||
# E2E test for xgboost-synthetic
|
||||
- app_dir: kubeflow/examples/test/workflows
|
||||
component: xgboost_synthetic
|
||||
name: xgboost2
|
||||
# E2E test for various notebooks
|
||||
# New notebooks can just add a step to the workflow
|
||||
- py_func: kubeflow.examples.create_e2e_workflow.create_workflow
|
||||
name: notebooks
|
||||
job_types:
|
||||
- periodic
|
||||
- presubmit
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# Internal code for testing of examples
|
||||
|
||||
This directory contains some python utilities reused for testing across
|
||||
example.
|
||||
|
||||
No actual examples are in this directory.
|
|
@ -0,0 +1 @@
|
|||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
|
@ -0,0 +1,389 @@
|
|||
""""Define the E2E workflow for kubeflow examples.
|
||||
|
||||
Rapid iteration.
|
||||
|
||||
Here are some pointers for rapidly iterating on the workflow during development.
|
||||
|
||||
1. You can use the e2e_tool.py to directly launch the workflow on a K8s cluster.
|
||||
If you don't have CLI access to the kubeflow-ci cluster (most folks) then
|
||||
you would need to setup your own test cluster.
|
||||
|
||||
2. Running with the E2E tool.
|
||||
|
||||
export PYTHONPATH=${PYTHONPATH}:${KUBEFLOW_EXAMPLES}/py:${KUBEFLOW_TESTING_REPO}/py
|
||||
|
||||
python -m kubeflow.testing.e2e_tool apply \
|
||||
kubeflow.examples.create_e2e_workflow.create_workflow
|
||||
--name=${USER}-kfctl-test-$(date +%Y%m%d-%H%M%S) \
|
||||
--namespace=kubeflow-test-infra \
|
||||
--open-in-chrome=true
|
||||
|
||||
To use code from a pull request set the prow envariables; e.g.
|
||||
|
||||
export JOB_NAME="jlewi-test"
|
||||
export JOB_TYPE="presubmit"
|
||||
export BUILD_ID=1234
|
||||
export PROW_JOB_ID=1234
|
||||
export REPO_OWNER=kubeflow
|
||||
export REPO_NAME=kubeflow
|
||||
export PULL_NUMBER=4148
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from kubeflow.testing import argo_build_util
|
||||
|
||||
# The name of the NFS volume claim to use for test files.
|
||||
NFS_VOLUME_CLAIM = "nfs-external"
|
||||
# The name to use for the volume to use to contain test data
|
||||
DATA_VOLUME = "kubeflow-test-volume"
|
||||
|
||||
# This is the main dag with the entrypoint
|
||||
E2E_DAG_NAME = "e2e"
|
||||
EXIT_DAG_NAME = "exit-handler"
|
||||
|
||||
# This is a sub dag containing the suite of tests to run against
|
||||
# Kubeflow deployment
|
||||
TESTS_DAG_NAME = "gke-tests"
|
||||
|
||||
TEMPLATE_LABEL = "examples_e2e"
|
||||
|
||||
MAIN_REPO = "kubeflow/examples"
|
||||
|
||||
EXTRA_REPOS = ["kubeflow/testing@HEAD"]
|
||||
|
||||
class Builder:
|
||||
def __init__(self, name=None, namespace=None, test_target_name=None,
|
||||
bucket=None,
|
||||
**kwargs): # pylint: disable=unused-argument
|
||||
"""Initialize a builder.
|
||||
|
||||
Args:
|
||||
name: Name for the workflow.
|
||||
namespace: Namespace for the workflow.
|
||||
test_target_name: (Optional) Name to use as the test target to group
|
||||
tests.
|
||||
"""
|
||||
self.name = name
|
||||
self.namespace = namespace
|
||||
# ****************************************************************************
|
||||
# Define directory locations
|
||||
# ****************************************************************************
|
||||
# mount_path is the directory where the volume to store the test data
|
||||
# should be mounted.
|
||||
self.mount_path = "/mnt/" + "test-data-volume"
|
||||
# test_dir is the root directory for all data for a particular test run.
|
||||
self.test_dir = self.mount_path + "/" + self.name
|
||||
# output_dir is the directory to sync to GCS to contain the output for this
|
||||
# job.
|
||||
self.output_dir = self.test_dir + "/output"
|
||||
|
||||
# We prefix the artifacts directory with junit because
|
||||
# that's what spyglass/prow requires. This ensures multiple
|
||||
# instances of a workflow triggered by the same prow job
|
||||
# don't end up clobbering each other
|
||||
self.artifacts_dir = self.output_dir + "/artifacts/junit_{0}".format(name)
|
||||
|
||||
# source directory where all repos should be checked out
|
||||
self.src_root_dir = self.test_dir + "/src"
|
||||
# The directory containing the kubeflow/examples repo
|
||||
self.src_dir = self.src_root_dir + "/kubeflow/examples"
|
||||
|
||||
# Top level directories for python code
|
||||
self.kubeflow_py = self.src_dir
|
||||
|
||||
# The directory within the kubeflow_testing submodule containing
|
||||
# py scripts to use.
|
||||
self.kubeflow_testing_py = self.src_root_dir + "/kubeflow/testing/py"
|
||||
|
||||
# The class name to label junit files.
|
||||
# We want to be able to group related tests in test grid.
|
||||
# Test grid allows grouping by target which corresponds to the classname
|
||||
# attribute in junit files.
|
||||
# So we set an environment variable to the desired class name.
|
||||
# The pytest modules can then look at this environment variable to
|
||||
# explicitly override the classname.
|
||||
# The classname should be unique for each run so it should take into
|
||||
# account the different parameters
|
||||
self.test_target_name = test_target_name
|
||||
|
||||
self.bucket = bucket
|
||||
self.workflow = None
|
||||
|
||||
def _build_workflow(self):
|
||||
"""Create the scaffolding for the Argo workflow"""
|
||||
workflow = {
|
||||
"apiVersion": "argoproj.io/v1alpha1",
|
||||
"kind": "Workflow",
|
||||
"metadata": {
|
||||
"name": self.name,
|
||||
"namespace": self.namespace,
|
||||
"labels": argo_build_util.add_dicts([{
|
||||
"workflow": self.name,
|
||||
"workflow_template": TEMPLATE_LABEL,
|
||||
}, argo_build_util.get_prow_labels()]),
|
||||
},
|
||||
"spec": {
|
||||
"entrypoint": E2E_DAG_NAME,
|
||||
# Have argo garbage collect old workflows otherwise we overload the API
|
||||
# server.
|
||||
"ttlSecondsAfterFinished": 7 * 24 * 60 * 60,
|
||||
"volumes": [
|
||||
{
|
||||
"name": "gcp-credentials",
|
||||
"secret": {
|
||||
"secretName": "kubeflow-testing-credentials",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": DATA_VOLUME,
|
||||
"persistentVolumeClaim": {
|
||||
"claimName": NFS_VOLUME_CLAIM,
|
||||
},
|
||||
},
|
||||
],
|
||||
"onExit": EXIT_DAG_NAME,
|
||||
"templates": [
|
||||
{
|
||||
"dag": {
|
||||
"tasks": [],
|
||||
},
|
||||
"name": E2E_DAG_NAME,
|
||||
},
|
||||
{
|
||||
"dag": {
|
||||
"tasks": [],
|
||||
},
|
||||
"name": TESTS_DAG_NAME,
|
||||
|
||||
},
|
||||
{
|
||||
"dag": {
|
||||
"tasks": [],
|
||||
},
|
||||
"name": EXIT_DAG_NAME,
|
||||
}
|
||||
],
|
||||
}, # spec
|
||||
} # workflow
|
||||
|
||||
return workflow
|
||||
|
||||
def _build_task_template(self):
|
||||
"""Return a template for all the tasks"""
|
||||
|
||||
task_template = {'activeDeadlineSeconds': 3000,
|
||||
'container': {'command': [],
|
||||
'env': [
|
||||
{"name": "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
"value": "/secret/gcp-credentials/key.json"},
|
||||
{"name": "TEST_TARGET_NAME",
|
||||
"value": self.test_target_name},
|
||||
],
|
||||
'image': 'gcr.io/kubeflow-ci/test-worker:latest',
|
||||
'imagePullPolicy': 'Always',
|
||||
'name': '',
|
||||
'resources': {'limits': {'cpu': '4', 'memory': '4Gi'},
|
||||
'requests': {'cpu': '1', 'memory': '1536Mi'}},
|
||||
'volumeMounts': [{'mountPath': '/mnt/test-data-volume',
|
||||
'name': 'kubeflow-test-volume'},
|
||||
{'mountPath':
|
||||
'/secret/gcp-credentials',
|
||||
'name': 'gcp-credentials'}]},
|
||||
'metadata': {'labels': {
|
||||
'workflow_template': TEMPLATE_LABEL}},
|
||||
'outputs': {}}
|
||||
|
||||
# Define common environment variables to be added to all steps
|
||||
common_env = [
|
||||
{'name': 'PYTHONPATH',
|
||||
'value': ":".join([self.kubeflow_py, self.kubeflow_py + "/py",
|
||||
self.kubeflow_testing_py,])},
|
||||
{'name': 'KUBECONFIG',
|
||||
'value': os.path.join(self.test_dir, 'kfctl_test/.kube/kubeconfig')},
|
||||
]
|
||||
|
||||
task_template["container"]["env"].extend(common_env)
|
||||
|
||||
task_template = argo_build_util.add_prow_env(task_template)
|
||||
|
||||
return task_template
|
||||
|
||||
def _build_step(self, name, workflow, dag_name, task_template,
|
||||
command, dependencies):
|
||||
"""Syntactic sugar to add a step to the workflow"""
|
||||
|
||||
step = argo_build_util.deep_copy(task_template)
|
||||
|
||||
step["name"] = name
|
||||
step["container"]["command"] = command
|
||||
|
||||
argo_build_util.add_task_to_dag(workflow, dag_name, step, dependencies)
|
||||
|
||||
# Return the newly created template; add_task_to_dag makes a copy of the template
|
||||
# So we need to fetch it from the workflow spec.
|
||||
for t in workflow["spec"]["templates"]:
|
||||
if t["name"] == name:
|
||||
return t
|
||||
|
||||
return None
|
||||
|
||||
def _build_tests_dag(self):
|
||||
"""Build the dag for the set of tests to run against a KF deployment."""
|
||||
|
||||
task_template = self._build_task_template()
|
||||
|
||||
# ***************************************************************************
|
||||
# Test xgboost
|
||||
step_name = "xgboost-synthetic"
|
||||
command = ["pytest", "xgboost_test.py",
|
||||
# I think -s mean stdout/stderr will print out to aid in debugging.
|
||||
# Failures still appear to be captured and stored in the junit file.
|
||||
"-s",
|
||||
# Increase the log level so that info level log statements show up.
|
||||
"--log-cli-level=info",
|
||||
# Test timeout in seconds.
|
||||
"--timeout=1800",
|
||||
"--junitxml=" + self.artifacts_dir + "/junit_xgboost-synthetic-test.xml",
|
||||
]
|
||||
|
||||
dependencies = []
|
||||
xgboost_step = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template,
|
||||
command, dependencies)
|
||||
xgboost_step["container"]["workingDir"] = os.path.join(self.src_dir,
|
||||
"xgboost_synthetic",
|
||||
"testing")
|
||||
|
||||
|
||||
def _build_exit_dag(self):
|
||||
"""Build the exit handler dag"""
|
||||
task_template = self._build_task_template()
|
||||
|
||||
# ***********************************************************************
|
||||
# Copy artifacts
|
||||
step_name = "copy-artifacts"
|
||||
command = ["python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" +
|
||||
self.output_dir,
|
||||
"copy_artifacts"]
|
||||
|
||||
if self.bucket:
|
||||
command.append("--bucket=" + self.bucket)
|
||||
|
||||
dependencies = []
|
||||
|
||||
copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template,
|
||||
command, dependencies)
|
||||
|
||||
# TODO(jlewi): We may need to run this with retries kubeflow/kubeflow
|
||||
# has a python script run with retries; we might want to move that
|
||||
# over to kubeflow.testing and use it.
|
||||
step_name = "test-dir-delete"
|
||||
command = ["rm",
|
||||
"-rf",
|
||||
self.test_dir, ]
|
||||
dependencies = [copy_artifacts["name"]]
|
||||
copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template,
|
||||
command, dependencies)
|
||||
|
||||
# We don't want to run from the directory we are trying to delete.
|
||||
copy_artifacts["container"]["workingDir"] = "/"
|
||||
|
||||
def build(self):
|
||||
self.workflow = self._build_workflow()
|
||||
task_template = self._build_task_template()
|
||||
|
||||
# **************************************************************************
|
||||
# Checkout
|
||||
|
||||
# create the checkout step
|
||||
main_repo = argo_build_util.get_repo_from_prow_env()
|
||||
if not main_repo:
|
||||
logging.info("Prow environment variables for repo not set")
|
||||
main_repo = MAIN_REPO + "@HEAD"
|
||||
logging.info("Main repository: %s", main_repo)
|
||||
repos = [main_repo]
|
||||
|
||||
repos.extend(EXTRA_REPOS)
|
||||
|
||||
#***************************************************************************
|
||||
# Checkout the code
|
||||
checkout = argo_build_util.deep_copy(task_template)
|
||||
|
||||
checkout["name"] = "checkout"
|
||||
checkout["container"]["command"] = ["/usr/local/bin/checkout_repos.sh",
|
||||
"--repos=" + ",".join(repos),
|
||||
"--src_dir=" + self.src_root_dir]
|
||||
|
||||
argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, checkout, [])
|
||||
|
||||
#***************************************************************************
|
||||
# Get credentials for the latest auto-deployed cluster
|
||||
|
||||
credentials = argo_build_util.deep_copy(task_template)
|
||||
|
||||
credentials["name"] = "get-credentials"
|
||||
credentials["container"]["command"] = ["python3",
|
||||
"-m",
|
||||
"kubeflow.testing."
|
||||
"get_kf_testing_cluster",
|
||||
"get-credentials",
|
||||
]
|
||||
|
||||
dependencies = [checkout["name"]]
|
||||
argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, credentials,
|
||||
dependencies)
|
||||
|
||||
#**************************************************************************
|
||||
# Run a dag of tests
|
||||
self._build_tests_dag()
|
||||
|
||||
# Add a task to run the dag
|
||||
dependencies = [credentials["name"]]
|
||||
argo_build_util.add_task_only_to_dag(self.workflow, E2E_DAG_NAME,
|
||||
TESTS_DAG_NAME,
|
||||
TESTS_DAG_NAME,
|
||||
dependencies)
|
||||
|
||||
# **************************************************************************
|
||||
# create_pr_symlink
|
||||
# ***************************************************************************
|
||||
# TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink
|
||||
step_name = "create-pr-symlink"
|
||||
command = ["python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" + self.output_dir,
|
||||
"create_pr_symlink"]
|
||||
|
||||
if self.bucket:
|
||||
command.append(self.bucket)
|
||||
|
||||
dependencies = [checkout["name"]]
|
||||
self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template,
|
||||
command, dependencies)
|
||||
|
||||
self._build_exit_dag()
|
||||
|
||||
# Set the labels on all templates
|
||||
self.workflow = argo_build_util.set_task_template_labels(self.workflow)
|
||||
|
||||
return self.workflow
|
||||
|
||||
# TODO(jlewi): This is an unnecessary layer of indirection around the builder
|
||||
# We should allow py_func in prow_config to point to the builder and
|
||||
# let e2e_tool take care of this.
|
||||
def create_workflow(**kwargs): # pylint: disable=too-many-statements
|
||||
"""Create workflow returns an Argo workflow to test kfctl upgrades.
|
||||
|
||||
Args:
|
||||
name: Name to give to the workflow. This can also be used to name things
|
||||
associated with the workflow.
|
||||
"""
|
||||
|
||||
builder = Builder(**kwargs)
|
||||
|
||||
return builder.build()
|
|
@ -1,439 +0,0 @@
|
|||
// Test workflow for XGBoost Housing example.
|
||||
//
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local overrides = std.extVar("__ksonnet/params").components.xgboost_synthetic;
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local util = import "util.libsonnet";
|
||||
|
||||
// Define default params and then combine them with any overrides
|
||||
local defaultParams = {
|
||||
// local nfsVolumeClaim: "kubeflow-testing",
|
||||
nfsVolumeClaim: "nfs-external",
|
||||
|
||||
// The name to use for the volume to use to contain test data.
|
||||
dataVolume: "kubeflow-test-volume",
|
||||
|
||||
// Default step image:
|
||||
stepImage: "gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4",
|
||||
|
||||
// Which Kubeflow cluster to use for running PytorchJobs on.
|
||||
kfProject: "kubeflow-ci-deployment",
|
||||
kfZone: "us-east1-b",
|
||||
kfCluster: "kf-vmaster-n00",
|
||||
|
||||
// The bucket where the model should be written
|
||||
// This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster)
|
||||
modelBucket: "kubeflow-ci_temp",
|
||||
|
||||
// Whether to delete the namespace at the end.
|
||||
// Leaving the namespace around can be useful for debugging.
|
||||
//
|
||||
// TODO(jlewi): We should consider running a cronjob to GC namespaces.
|
||||
// But if we leave namespaces up; then we end up leaving the servers up which
|
||||
// uses up CPU.
|
||||
//
|
||||
deleteNamespace: true,
|
||||
};
|
||||
|
||||
local params = defaultParams + overrides;
|
||||
|
||||
local prowEnv = util.parseEnv(params.prow_env);
|
||||
|
||||
// Create a dictionary of the different prow variables so we can refer to them in the workflow.
|
||||
//
|
||||
// Important: We want to initialize all variables we reference to some value. If we don't
|
||||
// and we reference a variable which doesn't get set then we get very hard to debug failure messages.
|
||||
// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows"
|
||||
// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet
|
||||
// fails because BUILD_ID is undefined.
|
||||
local prowDict = {
|
||||
BUILD_ID: "notset",
|
||||
BUILD_NUMBER: "notset",
|
||||
REPO_OWNER: "notset",
|
||||
REPO_NAME: "notset",
|
||||
JOB_NAME: "notset",
|
||||
JOB_TYPE: "notset",
|
||||
PULL_NUMBER: "notset",
|
||||
PULL_BASE_SHA: "notset",
|
||||
} + util.listOfDictToMap(prowEnv);
|
||||
|
||||
local bucket = params.bucket;
|
||||
|
||||
// mountPath is the directory where the volume to store the test data
|
||||
// should be mounted.
|
||||
local mountPath = "/mnt/" + "test-data-volume";
|
||||
// testDir is the root directory for all data for a particular test run.
|
||||
local testDir = mountPath + "/" + params.name;
|
||||
// outputDir is the directory to sync to GCS to contain the output for this job.
|
||||
local outputDir = testDir + "/output";
|
||||
local artifactsDir = outputDir + "/artifacts";
|
||||
|
||||
// Source directory where all repos should be checked out
|
||||
local srcRootDir = testDir + "/src";
|
||||
|
||||
// The directory containing the kubeflow/kubeflow repo
|
||||
local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME;
|
||||
|
||||
// These variables control where the docker images get pushed and what
|
||||
// tag to use
|
||||
local executeImage = "gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0";
|
||||
|
||||
// value of KUBECONFIG environment variable. This should be a full path.
|
||||
local kubeConfig = testDir + "/.kube/kubeconfig";
|
||||
|
||||
// Namespace where tests should run
|
||||
local testNamespace = "xgboost-synthetic-" + prowDict["BUILD_ID"];
|
||||
|
||||
// The directory within the kubeflow_testing submodule containing
|
||||
// py scripts to use.
|
||||
local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py";
|
||||
|
||||
// Workflow template is the name of the workflow template; typically the name of the ks component.
|
||||
// This is used as a label to make it easy to identify all Argo workflows created from a given
|
||||
// template.
|
||||
local workflow_template = "xgboost_synthetic";
|
||||
|
||||
// Build template is a template for constructing Argo step templates.
|
||||
//
|
||||
// step_name: Name for the template
|
||||
// command: List to pass as the container command.
|
||||
//
|
||||
// We customize the defaults for each step in the workflow by modifying
|
||||
// buildTemplate.argoTemplate
|
||||
local buildTemplate = {
|
||||
// name & command variables should be overwritten for every test.
|
||||
// Other variables can be changed per step as needed.
|
||||
// They are hidden because they shouldn't be included in the Argo template
|
||||
name: "",
|
||||
command:: "",
|
||||
image: params.stepImage,
|
||||
workingDir:: null,
|
||||
env_vars:: [],
|
||||
side_cars: [],
|
||||
pythonPath: kubeflowTestingPy,
|
||||
|
||||
activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template
|
||||
|
||||
local template = self,
|
||||
|
||||
// Actual template for Argo
|
||||
argoTemplate: {
|
||||
name: template.name,
|
||||
metadata: {
|
||||
labels: prowDict + {
|
||||
workflow: params.name,
|
||||
workflow_template: workflow_template,
|
||||
step_name: template.name,
|
||||
},
|
||||
},
|
||||
container: {
|
||||
command: template.command,
|
||||
name: template.name,
|
||||
image: template.image,
|
||||
workingDir: template.workingDir,
|
||||
env: [
|
||||
{
|
||||
// Add the source directories to the python path.
|
||||
name: "PYTHONPATH",
|
||||
value: template.pythonPath,
|
||||
},
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json",
|
||||
},
|
||||
{
|
||||
name: "GITHUB_TOKEN",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: "github-token",
|
||||
key: "github_token",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
// We use a directory in our NFS share to store our kube config.
|
||||
// This way we can configure it on a single step and reuse it on subsequent steps.
|
||||
name: "KUBECONFIG",
|
||||
value: kubeConfig,
|
||||
},
|
||||
] + prowEnv + template.env_vars,
|
||||
volumeMounts: [
|
||||
{
|
||||
name: params.dataVolume,
|
||||
mountPath: mountPath,
|
||||
},
|
||||
{
|
||||
name: "github-token",
|
||||
mountPath: "/secret/github-token",
|
||||
},
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}; // buildTemplate
|
||||
|
||||
|
||||
// Create a list of dictionary.
|
||||
// Each item is a dictionary describing one step in the graph.
|
||||
local dagTemplates = [
|
||||
{
|
||||
template: buildTemplate {
|
||||
name: "checkout",
|
||||
command:
|
||||
["/usr/local/bin/checkout.sh", srcRootDir],
|
||||
|
||||
env_vars: [{
|
||||
name: "EXTRA_REPOS",
|
||||
// TODO(jlewi): Pin to commit on master when #281 is checked in.
|
||||
value: "kubeflow/testing@HEAD:281",
|
||||
}],
|
||||
},
|
||||
dependencies: null,
|
||||
}, // checkout
|
||||
{
|
||||
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||
// should be done by run_e2e_workflow.py
|
||||
template: buildTemplate {
|
||||
name: "create-pr-symlink",
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" + outputDir,
|
||||
"create_pr_symlink",
|
||||
"--bucket=" + params.bucket,
|
||||
],
|
||||
}, // create-pr-symlink
|
||||
dependencies: ["checkout"],
|
||||
}, // create-pr-symlink
|
||||
{
|
||||
// Configure KUBECONFIG
|
||||
template: buildTemplate {
|
||||
name: "get-kubeconfig",
|
||||
command: util.buildCommand([
|
||||
[
|
||||
"gcloud",
|
||||
"auth",
|
||||
"activate-service-account",
|
||||
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||
],
|
||||
[
|
||||
"gcloud",
|
||||
"--project=" + params.kfProject,
|
||||
"container",
|
||||
"clusters",
|
||||
"get-credentials",
|
||||
"--zone=" + params.kfZone,
|
||||
params.kfCluster,
|
||||
]]
|
||||
),
|
||||
},
|
||||
dependencies: ["checkout"],
|
||||
}, // get-kubeconfig
|
||||
{
|
||||
// Create the namespace
|
||||
// TODO(jlewi): We should add some sort of retry.
|
||||
template: buildTemplate {
|
||||
name: "create-namespace",
|
||||
command: util.buildCommand([
|
||||
[
|
||||
"echo",
|
||||
"KUBECONFIG=",
|
||||
"${KUBECONFIG}",
|
||||
],
|
||||
[
|
||||
"gcloud",
|
||||
"auth",
|
||||
"activate-service-account",
|
||||
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||
],
|
||||
[
|
||||
"kubectl",
|
||||
"config" ,
|
||||
"current-context",
|
||||
],
|
||||
[
|
||||
"kubectl",
|
||||
"create",
|
||||
"namespace",
|
||||
testNamespace,
|
||||
],
|
||||
# Copy the GCP secret from the kubeflow namespace to the test namespace
|
||||
[
|
||||
srcDir + "/test/copy_secret.sh",
|
||||
"kubeflow",
|
||||
testNamespace,
|
||||
"user-gcp-sa",
|
||||
]]
|
||||
),
|
||||
},
|
||||
dependencies: ["get-kubeconfig"],
|
||||
}, // create-namespace
|
||||
{
|
||||
template: buildTemplate {
|
||||
name: "execute-notebook",
|
||||
command: [
|
||||
"python3",
|
||||
"xgboost_test.py",
|
||||
"--name=" + "xgboost-test-" + prowDict["BUILD_ID"],
|
||||
"--namespace=" + testNamespace,
|
||||
"--image=" + executeImage,
|
||||
"--jobType=" + prowDict["JOB_TYPE"],
|
||||
"--pullNumber=" + prowDict["PULL_NUMBER"],
|
||||
"--pullBaseSHA=" + prowDict["PULL_BASE_SHA"],
|
||||
"--cluster=" + params.kfCluster,
|
||||
],
|
||||
pythonPath: kubeflowTestingPy,
|
||||
workingDir: srcDir + "/xgboost_synthetic/testing",
|
||||
},
|
||||
dependencies: ["create-namespace"],
|
||||
}, // execute-notebook
|
||||
];
|
||||
|
||||
// Dag defines the tasks in the graph
|
||||
local dag = {
|
||||
name: "e2e",
|
||||
// Construct tasks from the templates
|
||||
// we will give the steps the same name as the template
|
||||
dag: {
|
||||
tasks: util.toArgoTaskList(dagTemplates),
|
||||
},
|
||||
}; // dag
|
||||
|
||||
// Define templates for the steps to be performed when the
|
||||
// test exits
|
||||
|
||||
local deleteTemplates = if params.deleteNamespace then
|
||||
[
|
||||
{
|
||||
// Delete the namespace
|
||||
// TODO(jlewi): We should add some sort of retry.
|
||||
template: buildTemplate {
|
||||
name: "delete-namespace",
|
||||
command: util.buildCommand([
|
||||
[
|
||||
"gcloud",
|
||||
"auth",
|
||||
"activate-service-account",
|
||||
"--key-file=${GOOGLE_APPLICATION_CREDENTIALS}",
|
||||
],
|
||||
[
|
||||
"kubectl",
|
||||
"delete",
|
||||
"namespace",
|
||||
testNamespace,
|
||||
]]
|
||||
),
|
||||
},
|
||||
}, // delete-namespace
|
||||
] else [];
|
||||
|
||||
local exitTemplates =
|
||||
deleteTemplates +
|
||||
[
|
||||
{
|
||||
// Copy artifacts to GCS for gubernator.
|
||||
// TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink
|
||||
// should be done by run_e2e_workflow.py
|
||||
template: buildTemplate {
|
||||
name: "copy-artifacts",
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"kubeflow.testing.prow_artifacts",
|
||||
"--artifacts_dir=" + outputDir,
|
||||
"copy_artifacts",
|
||||
"--bucket=" + bucket,
|
||||
],
|
||||
}, // copy-artifacts,
|
||||
},
|
||||
{
|
||||
// Delete the test directory in NFS.
|
||||
// TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this.
|
||||
template:
|
||||
buildTemplate {
|
||||
name: "test-dir-delete",
|
||||
command: [
|
||||
"rm",
|
||||
"-rf",
|
||||
testDir,
|
||||
],
|
||||
|
||||
argoTemplate+: {
|
||||
retryStrategy: {
|
||||
limit: 3,
|
||||
},
|
||||
},
|
||||
}, // test-dir-delete
|
||||
dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [],
|
||||
},
|
||||
];
|
||||
|
||||
// Create a DAG representing the set of steps to execute on exit
|
||||
local exitDag = {
|
||||
name: "exit-handler",
|
||||
// Construct tasks from the templates
|
||||
// we will give the steps the same name as the template
|
||||
dag: {
|
||||
tasks: util.toArgoTaskList(exitTemplates),
|
||||
},
|
||||
};
|
||||
|
||||
// A list of templates for the actual steps
|
||||
local stepTemplates = std.map(function(i) i.template.argoTemplate
|
||||
, dagTemplates) +
|
||||
std.map(function(i) i.template.argoTemplate
|
||||
, exitTemplates);
|
||||
|
||||
// Define the Argo Workflow.
|
||||
local workflow = {
|
||||
apiVersion: "argoproj.io/v1alpha1",
|
||||
kind: "Workflow",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: prowDict + {
|
||||
workflow: params.name,
|
||||
workflow_template: workflow_template,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
entrypoint: "e2e",
|
||||
// Have argo garbage collect old workflows otherwise we overload the API server.
|
||||
ttlSecondsAfterFinished: 7 * 24 * 60 * 60,
|
||||
volumes: [
|
||||
{
|
||||
name: "github-token",
|
||||
secret: {
|
||||
secretName: "github-token",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "kubeflow-testing-credentials",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: params.dataVolume,
|
||||
persistentVolumeClaim: {
|
||||
claimName: params.nfsVolumeClaim,
|
||||
},
|
||||
},
|
||||
], // volumes
|
||||
|
||||
// onExit specifies the template that should always run when the workflow completes.
|
||||
onExit: "exit-handler",
|
||||
|
||||
// The templates will be a combination of the templates
|
||||
// defining the dags executed by Argo as well as the templates
|
||||
// for the individual steps.
|
||||
templates: [dag, exitDag] + stepTemplates, // templates
|
||||
}, // spec
|
||||
}; // workflow
|
||||
|
||||
std.prune(k.core.v1.list.new([workflow]))
|
|
@ -3,7 +3,12 @@
|
|||
# This docker image is based on existing notebook image
|
||||
# It also includes the dependencies required for training and deploying
|
||||
# this way we can use it as the base image
|
||||
FROM gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0
|
||||
# Note: when using ClusterBuilder I believe the base image gets over written
|
||||
FROM gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-cpu:v0.7.0
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
# We want to install the requirements in the system directory so we need to switch to root
|
||||
USER root
|
||||
RUN pip3 --no-cache-dir install -r requirements.txt
|
||||
USER jovyan
|
||||
|
|
|
@ -29,20 +29,18 @@
|
|||
"source": [
|
||||
"### Verify we have a GCP account\n",
|
||||
"\n",
|
||||
"* The cell below checks that this notebook was spawned with credentials to access GCP\n",
|
||||
"* To add credentials when you created the notebook you should have selected add gcp credential as shown below\n",
|
||||
" \n"
|
||||
"* The cell below checks that this notebook was spawned with credentials to access GCP"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"if not os.getenv(\"GOOGLE_APPLICATION_CREDENTIALS\"):\n",
|
||||
" raise ValueError(\"Notebook is missing google application credentials\")"
|
||||
"from oauth2client.client import GoogleCredentials\n",
|
||||
"credentials = GoogleCredentials.get_application_default()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -56,49 +54,23 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"pip installing requirements.txt\n",
|
||||
"pip installing KFP https://storage.googleapis.com/ml-pipeline/release/0.1.32/kfp.tar.gz\n",
|
||||
"pip installing fairing git+git://github.com/kubeflow/fairing.git@7c93e888c3fc98bdf5fb0140e90f6407ce7a807b\n",
|
||||
"Configure docker credentials\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip3 install retrying"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Install a specific version of kubeflow-fairing that this example is tested against"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip3 install git+git://github.com/kubeflow/fairing.git@b3db9a548b51eea93250c662defe6470283943b3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Perform some notebook setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import util\n",
|
||||
"from pathlib import Path\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"util.notebook_setup()"
|
||||
"import notebook_setup\n",
|
||||
"notebook_setup.notebook_setup()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -111,7 +83,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -142,11 +114,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Imports not to be included in the built docker image\n",
|
||||
"import util\n",
|
||||
"import kfp\n",
|
||||
"import kfp.components as comp\n",
|
||||
"import kfp.gcp as gcp\n",
|
||||
|
@ -171,7 +144,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -194,7 +167,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -267,7 +240,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -384,9 +357,90 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"model_file=mockup-model.dat\n",
|
||||
"[00:34:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
|
||||
"[0]\tvalidation_0-rmse:162.856\n",
|
||||
"Will train until validation_0-rmse hasn't improved in 40 rounds.\n",
|
||||
"[1]\tvalidation_0-rmse:156.25\n",
|
||||
"[2]\tvalidation_0-rmse:150.238\n",
|
||||
"[3]\tvalidation_0-rmse:145.026\n",
|
||||
"[4]\tvalidation_0-rmse:138.321\n",
|
||||
"[5]\tvalidation_0-rmse:131.554\n",
|
||||
"[6]\tvalidation_0-rmse:127.809\n",
|
||||
"[7]\tvalidation_0-rmse:122.574\n",
|
||||
"[8]\tvalidation_0-rmse:117.394\n",
|
||||
"[9]\tvalidation_0-rmse:114.842\n",
|
||||
"[10]\tvalidation_0-rmse:111.601\n",
|
||||
"[11]\tvalidation_0-rmse:108.426\n",
|
||||
"[12]\tvalidation_0-rmse:105.283\n",
|
||||
"[13]\tvalidation_0-rmse:102.916\n",
|
||||
"[14]\tvalidation_0-rmse:101.126\n",
|
||||
"[15]\tvalidation_0-rmse:98.9049\n",
|
||||
"[16]\tvalidation_0-rmse:96.6027\n",
|
||||
"[17]\tvalidation_0-rmse:94.6449\n",
|
||||
"[18]\tvalidation_0-rmse:92.7175\n",
|
||||
"[19]\tvalidation_0-rmse:89.821\n",
|
||||
"[20]\tvalidation_0-rmse:87.785\n",
|
||||
"[21]\tvalidation_0-rmse:85.8316\n",
|
||||
"[22]\tvalidation_0-rmse:84.7495\n",
|
||||
"[23]\tvalidation_0-rmse:83.3638\n",
|
||||
"[24]\tvalidation_0-rmse:81.9553\n",
|
||||
"[25]\tvalidation_0-rmse:80.1649\n",
|
||||
"[26]\tvalidation_0-rmse:79.2545\n",
|
||||
"[27]\tvalidation_0-rmse:77.5626\n",
|
||||
"[28]\tvalidation_0-rmse:75.979\n",
|
||||
"[29]\tvalidation_0-rmse:74.6956\n",
|
||||
"[30]\tvalidation_0-rmse:74.1145\n",
|
||||
"[31]\tvalidation_0-rmse:73.102\n",
|
||||
"[32]\tvalidation_0-rmse:71.9953\n",
|
||||
"[33]\tvalidation_0-rmse:71.2614\n",
|
||||
"[34]\tvalidation_0-rmse:70.4738\n",
|
||||
"[35]\tvalidation_0-rmse:69.6975\n",
|
||||
"[36]\tvalidation_0-rmse:69.0899\n",
|
||||
"[37]\tvalidation_0-rmse:68.6369\n",
|
||||
"[38]\tvalidation_0-rmse:67.6392\n",
|
||||
"[39]\tvalidation_0-rmse:67.153\n",
|
||||
"[40]\tvalidation_0-rmse:66.8115\n",
|
||||
"[41]\tvalidation_0-rmse:66.2017\n",
|
||||
"[42]\tvalidation_0-rmse:65.5889\n",
|
||||
"[43]\tvalidation_0-rmse:64.793\n",
|
||||
"[44]\tvalidation_0-rmse:64.2622\n",
|
||||
"[45]\tvalidation_0-rmse:63.75\n",
|
||||
"[46]\tvalidation_0-rmse:63.0683\n",
|
||||
"[47]\tvalidation_0-rmse:62.5844\n",
|
||||
"[48]\tvalidation_0-rmse:62.4817\n",
|
||||
"[49]\tvalidation_0-rmse:61.9615\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mean_absolute_error=47.50\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Best RMSE on eval: %.2f with %d rounds 61.961517 50\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Model export success: mockup-model.dat\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = ModelServe(model_file=\"mockup-model.dat\")\n",
|
||||
"model.train()"
|
||||
|
@ -404,9 +458,29 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"model_file not supplied; using the default\n",
|
||||
"model_file=mockup-model.dat\n",
|
||||
"[00:34:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[[361.5152893066406, -99.92890930175781]]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"(train_X, train_y), (test_X, test_y) =read_synthetic_input()\n",
|
||||
"\n",
|
||||
|
@ -441,7 +515,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -468,9 +542,31 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Converting build-train-deploy.ipynb to build-train-deploy.py\n",
|
||||
"Creating entry point for the class name ModelServe\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[PosixPath('build-train-deploy.py'),\n",
|
||||
" 'mockup-model.dat',\n",
|
||||
" 'xgboost_util.py',\n",
|
||||
" 'requirements.txt']"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from kubeflow.fairing.builders import cluster\n",
|
||||
"preprocessor = ConvertNotebookPreprocessorWithFire(class_name='ModelServe', notebook_file='build-train-deploy.ipynb')\n",
|
||||
|
@ -497,15 +593,258 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Building image using cluster builder.\n",
|
||||
"Creating docker context: /tmp/fairing_context_ybqvdghn\n",
|
||||
"Converting build-train-deploy.ipynb to build-train-deploy.py\n",
|
||||
"Creating entry point for the class name ModelServe\n",
|
||||
"Waiting for fairing-builder-ksmm7-gt427 to start...\n",
|
||||
"Waiting for fairing-builder-ksmm7-gt427 to start...\n",
|
||||
"Waiting for fairing-builder-ksmm7-gt427 to start...\n",
|
||||
"Pod started running True\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ERROR: logging before flag.Parse: E1025 01:42:23.499654 1 metadata.go:241] Failed to unmarshal scopes: invalid character 'h' looking for beginning of value\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n",
|
||||
"\u001b[33mWARN\u001b[0m[0002] Error while retrieving image from cache: getting image from path: open /cache/sha256:5aaccf0267f085afd976342a8e943a9c6cefccef5b554df4e15fa7bf15cbd7a3: no such file or directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] Checking for cached layer gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933...\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] No cached layer found for cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
||||
"\u001b[36mINFO\u001b[0m[0002] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.\n",
|
||||
"\u001b[36mINFO\u001b[0m[0117] Taking snapshot of full filesystem...\n",
|
||||
"\u001b[36mINFO\u001b[0m[0129] Skipping paths under /dev, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0129] Skipping paths under /etc/secrets, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0129] Skipping paths under /kaniko, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0130] Skipping paths under /proc, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0130] Skipping paths under /sys, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0131] Skipping paths under /var/run, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] WORKDIR /app/\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] cmd: workdir\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] Changed working directory to /app/\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] Creating directory /app/\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] Taking snapshot of files...\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] ENV FAIRING_RUNTIME 1\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] No files changed in this command, skipping snapshotting.\n",
|
||||
"\u001b[36mINFO\u001b[0m[0202] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n",
|
||||
"\u001b[36mINFO\u001b[0m[0203] COPY /app//requirements.txt /app/\n",
|
||||
"\u001b[36mINFO\u001b[0m[0203] Taking snapshot of files...\n",
|
||||
"\u001b[36mINFO\u001b[0m[0203] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
||||
"\u001b[36mINFO\u001b[0m[0203] cmd: /bin/bash\n",
|
||||
"\u001b[36mINFO\u001b[0m[0203] args: [-c if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi]\n",
|
||||
"Collecting fire (from -r requirements.txt (line 1))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/d9/69/faeaae8687f4de0f5973694d02e9d6c3eb827636a009157352d98de1129e/fire-0.2.1.tar.gz (76kB)\n",
|
||||
"Collecting gitpython (from -r requirements.txt (line 2))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/aa/25/9fd9f0b05408021736a22ae73f837152c132e4ea85cdd71d186e24efec31/GitPython-3.0.4-py3-none-any.whl (454kB)\n",
|
||||
"Requirement already satisfied: google-cloud-storage in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 3)) (1.14.0)\n",
|
||||
"Collecting joblib (from -r requirements.txt (line 4))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)\n",
|
||||
"Collecting kubeflow-metadata (from -r requirements.txt (line 5))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/43/b4/3fa3c1a88b8c52695b33acd09189dda8c84ea582acbfd07a1d46f085828c/kubeflow_metadata-0.2.0-py3-none-any.whl (69kB)\n",
|
||||
"Requirement already satisfied: numpy in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 6)) (1.16.2)\n",
|
||||
"Collecting pandas (from -r requirements.txt (line 7))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/86/12/08b092f6fc9e4c2552e37add0861d0e0e0d743f78f1318973caad970b3fc/pandas-0.25.2-cp36-cp36m-manylinux1_x86_64.whl (10.4MB)\n",
|
||||
"Collecting retrying (from -r requirements.txt (line 8))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/44/ef/beae4b4ef80902f22e3af073397f079c96969c69b2c7d52a57ea9ae61c9d/retrying-1.3.3.tar.gz\n",
|
||||
"Collecting seldon-core (from -r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/62/25/442db772bc1950864756de2b7cb9f23b0ae0d0997189f3e3eb56e84ea22f/seldon_core-0.4.1-py3-none-any.whl (45kB)\n",
|
||||
"Collecting sklearn (from -r requirements.txt (line 10))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz\n",
|
||||
"Requirement already satisfied: xgboost in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 11)) (0.82)\n",
|
||||
"Collecting tornado>=6.0.3 (from -r requirements.txt (line 12))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/30/78/2d2823598496127b21423baffaa186b668f73cd91887fcef78b6eade136b/tornado-6.0.3.tar.gz (482kB)\n",
|
||||
"Requirement already satisfied: six in /opt/conda/lib/python3.6/site-packages (from fire->-r requirements.txt (line 1)) (1.12.0)\n",
|
||||
"Requirement already satisfied: termcolor in /opt/conda/lib/python3.6/site-packages (from fire->-r requirements.txt (line 1)) (1.1.0)\n",
|
||||
"Collecting gitdb2>=2.0.0 (from gitpython->-r requirements.txt (line 2))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/03/6c/99296f89bad2ef85626e1df9f677acbee8885bb043ad82ad3ed4746d2325/gitdb2-2.0.6-py2.py3-none-any.whl (63kB)\n",
|
||||
"Requirement already satisfied: google-resumable-media>=0.3.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (0.3.2)\n",
|
||||
"Requirement already satisfied: google-cloud-core<0.30dev,>=0.29.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (0.29.1)\n",
|
||||
"Requirement already satisfied: google-api-core<2.0.0dev,>=1.6.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (1.9.0)\n",
|
||||
"Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.6/site-packages (from pandas->-r requirements.txt (line 7)) (2018.9)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.6.1 in /opt/conda/lib/python3.6/site-packages (from pandas->-r requirements.txt (line 7)) (2.8.0)\n",
|
||||
"Requirement already satisfied: grpcio in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (1.19.0)\n",
|
||||
"Collecting Flask-OpenTracing==0.2.0 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/1d/c4/0546b854a3f42af9ef959df9bd1108903698e175e7a07c057cdfaeeef718/Flask_OpenTracing-0.2.0-py2.py3-none-any.whl\n",
|
||||
"Collecting flatbuffers (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/c9/84/adf5837f96c39990bc55afdfddf460b38b4562f50341359afa32e4a98de7/flatbuffers-1.11-py2.py3-none-any.whl\n",
|
||||
"Collecting minio>=4.0.9 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/ba/17/6084f63de9bd7c6d47b5aab719d6246c01d74d4aaad373e0142a666080cc/minio-5.0.1-py2.py3-none-any.whl (62kB)\n",
|
||||
"Requirement already satisfied: requests in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (2.21.0)\n",
|
||||
"Collecting flask-cors (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n",
|
||||
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (5.1)\n",
|
||||
"Requirement already satisfied: protobuf in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (3.7.1)\n",
|
||||
"Collecting opentracing<2,>=1.2.2 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/06/c2/90b35a1abdc639a5c6000d8202c70217d60e80f5b328870efb73fda71115/opentracing-1.3.0.tar.gz\n",
|
||||
"Collecting flask (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/9b/93/628509b8d5dc749656a9641f4caf13540e2cdec85276964ff8f43bbb1d3b/Flask-1.1.1-py2.py3-none-any.whl (94kB)\n",
|
||||
"Collecting grpcio-opentracing (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/db/82/2fcad380697c3dab25de76ee590bcab3eb9bbfb4add916044d7e83ec2b10/grpcio_opentracing-1.1.4-py3-none-any.whl\n",
|
||||
"Requirement already satisfied: tensorflow in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (1.13.1)\n",
|
||||
"Collecting gunicorn>=19.9.0 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/8c/da/b8dd8deb741bff556db53902d4706774c8e1e67265f69528c14c003644e6/gunicorn-19.9.0-py2.py3-none-any.whl (112kB)\n",
|
||||
"Collecting jaeger-client==3.13.0 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/c8/a2/e9bd04cd660cbdffe0598173be068be23099fbd68e7a4a89b74440509130/jaeger-client-3.13.0.tar.gz (77kB)\n",
|
||||
"Collecting azure-storage-blob>=2.0.1 (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/3e/84/610f379b46d7d3c2d48eadeed6a12b6d46a43100fea70534f5992d0ac996/azure_storage_blob-2.1.0-py2.py3-none-any.whl (88kB)\n",
|
||||
"Collecting redis (from seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/32/ae/28613a62eea0d53d3db3147f8715f90da07667e99baeedf1010eb400f8c0/redis-3.3.11-py2.py3-none-any.whl (66kB)\n",
|
||||
"Collecting scikit-learn (from sklearn->-r requirements.txt (line 10))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/a0/c5/d2238762d780dde84a20b8c761f563fe882b88c5a5fb03c056547c442a19/scikit_learn-0.21.3-cp36-cp36m-manylinux1_x86_64.whl (6.7MB)\n",
|
||||
"Requirement already satisfied: scipy in /opt/conda/lib/python3.6/site-packages (from xgboost->-r requirements.txt (line 11)) (1.2.1)\n",
|
||||
"Collecting smmap2>=2.0.0 (from gitdb2>=2.0.0->gitpython->-r requirements.txt (line 2))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/55/d2/866d45e3a121ee15a1dc013824d58072fd5c7799c9c34d01378eb262ca8f/smmap2-2.0.5-py2.py3-none-any.whl\n",
|
||||
"Requirement already satisfied: googleapis-common-protos!=1.5.4,<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (1.5.9)\n",
|
||||
"Requirement already satisfied: google-auth<2.0dev,>=0.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (1.6.3)\n",
|
||||
"Requirement already satisfied: setuptools>=34.0.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (40.9.0)\n",
|
||||
"Requirement already satisfied: future in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (0.17.1)\n",
|
||||
"Requirement already satisfied: urllib3 in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (1.24.1)\n",
|
||||
"Requirement already satisfied: certifi in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (2019.3.9)\n",
|
||||
"Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests->seldon-core->-r requirements.txt (line 9)) (2.8)\n",
|
||||
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests->seldon-core->-r requirements.txt (line 9)) (3.0.4)\n",
|
||||
"Collecting Jinja2>=2.10.1 (from flask->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/65/e0/eb35e762802015cab1ccee04e8a277b03f1d8e53da3ec3106882ec42558b/Jinja2-2.10.3-py2.py3-none-any.whl (125kB)\n",
|
||||
"Requirement already satisfied: Werkzeug>=0.15 in /opt/conda/lib/python3.6/site-packages (from flask->seldon-core->-r requirements.txt (line 9)) (0.15.2)\n",
|
||||
"Collecting itsdangerous>=0.24 (from flask->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/76/ae/44b03b253d6fade317f32c24d100b3b35c2239807046a4c953c7b89fa49e/itsdangerous-1.1.0-py2.py3-none-any.whl\n",
|
||||
"Collecting click>=5.1 (from flask->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl (81kB)\n",
|
||||
"Requirement already satisfied: tensorboard<1.14.0,>=1.13.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.13.1)\n",
|
||||
"Requirement already satisfied: keras-applications>=1.0.6 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.0.7)\n",
|
||||
"Requirement already satisfied: astor>=0.6.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.7.1)\n",
|
||||
"Requirement already satisfied: tensorflow-estimator<1.14.0rc0,>=1.13.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.13.0)\n",
|
||||
"Requirement already satisfied: gast>=0.2.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.2.2)\n",
|
||||
"Requirement already satisfied: keras-preprocessing>=1.0.5 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.0.9)\n",
|
||||
"Requirement already satisfied: wheel>=0.26 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.33.1)\n",
|
||||
"Requirement already satisfied: absl-py>=0.1.6 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.7.1)\n",
|
||||
"Collecting threadloop<2,>=1 (from jaeger-client==3.13.0->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/d3/1d/8398c1645b97dc008d3c658e04beda01ede3d90943d40c8d56863cf891bd/threadloop-1.0.2.tar.gz\n",
|
||||
"Collecting thrift (from jaeger-client==3.13.0->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/c6/b4/510617906f8e0c5660e7d96fbc5585113f83ad547a3989b80297ac72a74c/thrift-0.11.0.tar.gz (52kB)\n",
|
||||
"Collecting azure-common>=1.1.5 (from azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/00/55/a703923c12cd3172d5c007beda0c1a34342a17a6a72779f8a7c269af0cd6/azure_common-1.1.23-py2.py3-none-any.whl\n",
|
||||
"Collecting azure-storage-common~=2.1 (from azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9))\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/6b/a0/6794b318ce0118d1a4053bdf0149a60807407db9b710354f2b203c2f5975/azure_storage_common-2.1.0-py2.py3-none-any.whl (47kB)\n",
|
||||
"Requirement already satisfied: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (3.1.0)\n",
|
||||
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (0.2.4)\n",
|
||||
"Requirement already satisfied: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (4.0)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.6/site-packages (from Jinja2>=2.10.1->flask->seldon-core->-r requirements.txt (line 9)) (1.1.1)\n",
|
||||
"Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.6/site-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (3.1)\n",
|
||||
"Requirement already satisfied: h5py in /opt/conda/lib/python3.6/site-packages (from keras-applications>=1.0.6->tensorflow->seldon-core->-r requirements.txt (line 9)) (2.9.0)\n",
|
||||
"Requirement already satisfied: mock>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (2.0.0)\n",
|
||||
"Requirement already satisfied: cryptography in /opt/conda/lib/python3.6/site-packages (from azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (2.6.1)\n",
|
||||
"Requirement already satisfied: pyasn1<0.5.0,>=0.4.1 in /opt/conda/lib/python3.6/site-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (0.4.5)\n",
|
||||
"Requirement already satisfied: pbr>=0.11 in /opt/conda/lib/python3.6/site-packages (from mock>=2.0.0->tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (5.1.3)\n",
|
||||
"Requirement already satisfied: cffi!=1.11.3,>=1.8 in /opt/conda/lib/python3.6/site-packages (from cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (1.12.2)\n",
|
||||
"Requirement already satisfied: asn1crypto>=0.21.0 in /opt/conda/lib/python3.6/site-packages (from cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (0.24.0)\n",
|
||||
"Requirement already satisfied: pycparser in /opt/conda/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (2.19)\n",
|
||||
"fairing 0.5 has requirement tornado<6.0.0,>=5.1.1, but you'll have tornado 6.0.3 which is incompatible.\n",
|
||||
"jaeger-client 3.13.0 has requirement tornado<5,>=4.3, but you'll have tornado 6.0.3 which is incompatible.\n",
|
||||
"seldon-core 0.4.1 has requirement google-cloud-storage>=1.16.0, but you'll have google-cloud-storage 1.14.0 which is incompatible.\n",
|
||||
"Installing collected packages: fire, smmap2, gitdb2, gitpython, joblib, kubeflow-metadata, pandas, retrying, opentracing, Jinja2, itsdangerous, click, flask, Flask-OpenTracing, flatbuffers, minio, flask-cors, grpcio-opentracing, gunicorn, tornado, threadloop, thrift, jaeger-client, azure-common, azure-storage-common, azure-storage-blob, redis, seldon-core, scikit-learn, sklearn\n",
|
||||
" Running setup.py install for fire: started\n",
|
||||
" Running setup.py install for fire: finished with status 'done'\n",
|
||||
" Running setup.py install for retrying: started\n",
|
||||
" Running setup.py install for retrying: finished with status 'done'\n",
|
||||
" Running setup.py install for opentracing: started\n",
|
||||
" Running setup.py install for opentracing: finished with status 'done'\n",
|
||||
" Found existing installation: Jinja2 2.10\n",
|
||||
" Uninstalling Jinja2-2.10:\n",
|
||||
" Successfully uninstalled Jinja2-2.10\n",
|
||||
" Found existing installation: tornado 5.1.1\n",
|
||||
" Uninstalling tornado-5.1.1:\n",
|
||||
" Successfully uninstalled tornado-5.1.1\n",
|
||||
" Running setup.py install for tornado: started\n",
|
||||
" Running setup.py install for tornado: finished with status 'done'\n",
|
||||
" Running setup.py install for threadloop: started\n",
|
||||
" Running setup.py install for threadloop: finished with status 'done'\n",
|
||||
" Running setup.py install for thrift: started\n",
|
||||
" Running setup.py install for thrift: finished with status 'done'\n",
|
||||
" Running setup.py install for jaeger-client: started\n",
|
||||
" Running setup.py install for jaeger-client: finished with status 'done'\n",
|
||||
" Running setup.py install for sklearn: started\n",
|
||||
" Running setup.py install for sklearn: finished with status 'done'\n",
|
||||
"Successfully installed Flask-OpenTracing-0.2.0 Jinja2-2.10.3 azure-common-1.1.23 azure-storage-blob-2.1.0 azure-storage-common-2.1.0 click-7.0 fire-0.2.1 flask-1.1.1 flask-cors-3.0.8 flatbuffers-1.11 gitdb2-2.0.6 gitpython-3.0.4 grpcio-opentracing-1.1.4 gunicorn-19.9.0 itsdangerous-1.1.0 jaeger-client-3.13.0 joblib-0.14.0 kubeflow-metadata-0.2.0 minio-5.0.1 opentracing-1.3.0 pandas-0.25.2 redis-3.3.11 retrying-1.3.3 scikit-learn-0.21.3 seldon-core-0.4.1 sklearn-0.0 smmap2-2.0.5 threadloop-1.0.2 thrift-0.11.0 tornado-6.0.3\n",
|
||||
"You are using pip version 19.0.1, however version 19.3.1 is available.\n",
|
||||
"You should consider upgrading via the 'pip install --upgrade pip' command.\n",
|
||||
"\u001b[36mINFO\u001b[0m[0240] Taking snapshot of full filesystem...\n",
|
||||
"\u001b[36mINFO\u001b[0m[0241] Skipping paths under /dev, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0241] Skipping paths under /etc/secrets, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0241] Skipping paths under /kaniko, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0242] Skipping paths under /proc, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0242] Skipping paths under /sys, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Skipping paths under /var/run, as it is a whitelisted directory\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/kqueue.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado-5.1.1-py3.6.egg-info\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/stack_context.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/Jinja2-2.10.dist-info\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/test/stack_context_test.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/kqueue.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/epoll.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/select.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/common.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/common.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/__pycache__/stack_context.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/test/__pycache__/stack_context_test.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/epoll.cpython-36.pyc\n",
|
||||
"\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/select.py\n",
|
||||
"\u001b[36mINFO\u001b[0m[0277] Using files from context: [/kaniko/buildcontext/app]\n",
|
||||
"\u001b[36mINFO\u001b[0m[0277] Pushing layer gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933 to cache now\n",
|
||||
"\u001b[36mINFO\u001b[0m[0277] COPY /app/ /app/\n",
|
||||
"\u001b[36mINFO\u001b[0m[0277] Taking snapshot of files...\n",
|
||||
"2019/10/25 01:47:01 pushed blob sha256:671fd5dc4379ffdf4694c30fd98b8b6bae9213cdff0939b936debf0f22f78708\n",
|
||||
"2019/10/25 01:47:05 pushed blob sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770\n",
|
||||
"2019/10/25 01:47:05 gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933: digest: sha256:2339a62186a93347f3bb9bc85456045d45dc9152793ccc5164210b58aab5512b size: 429\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79\n",
|
||||
"2019/10/25 01:47:05 existing blob: sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce\n",
|
||||
"2019/10/25 01:47:07 pushed blob sha256:80d3506bc094600aada9dc076b44354b134277700f2420838db7b742c50533ed\n",
|
||||
"2019/10/25 01:47:07 pushed blob sha256:2e67912c44ec0aadea8c990a4a8fc882e4655a798807840977b49b5a972eb47d\n",
|
||||
"2019/10/25 01:47:07 pushed blob sha256:2f2b9c4bf759eaf2afb42e189cc50b21d4614d1892227349409d012a90355268\n",
|
||||
"2019/10/25 01:47:07 pushed blob sha256:5831cf619d1fb5d7b9430a0943017516edf2d83451941d468c78479b73f65975\n",
|
||||
"2019/10/25 01:47:08 gcr.io/jlewi-dev/fairing-job/fairing-job:A486B058: digest: sha256:bf1c54b7880b81f232c15f31a0af74a70550e2eedffd2c9ff289f32f4b8d85fa size: 4325\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Use a stock jupyter image as our base image\n",
|
||||
"# TODO(jlewi): Should we try to use the downward API to default to the image we are running in?\n",
|
||||
"# TODO(https://github.com/kubeflow/fairing/issues/404): We need to fix 404\n",
|
||||
"# before we can upgrade to the 0.7.0 image as the base image.\n",
|
||||
"# We will need to use that to set the Dockerfile used by ClusterBuilder\n",
|
||||
"# base_image = \"gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-cpu:v0.7.0\"\n",
|
||||
"base_image = \"gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\"\n",
|
||||
"\n",
|
||||
"cluster_builder = cluster.cluster.ClusterBuilder(registry=DOCKER_REGISTRY,\n",
|
||||
" base_image=base_image,\n",
|
||||
" preprocessor=preprocessor,\n",
|
||||
|
@ -538,11 +877,63 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Converting build-train-deploy.ipynb to build-train-deploy.py\n",
|
||||
"Creating entry point for the class name ModelServe\n",
|
||||
"Building image using Append builder...\n",
|
||||
"Creating docker context: /tmp/fairing_context_41v9y1k9\n",
|
||||
"Converting build-train-deploy.ipynb to build-train-deploy.py\n",
|
||||
"Creating entry point for the class name ModelServe\n",
|
||||
"build-train-deploy.py already exists in Fairing context, skipping...\n",
|
||||
"Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:A486B058'\n",
|
||||
"Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n",
|
||||
"Successfully obtained Docker credentials.\n",
|
||||
"Image successfully built in 2.0983306730049662s.\n",
|
||||
"Pushing image gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7...\n",
|
||||
"Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7'\n",
|
||||
"Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n",
|
||||
"Successfully obtained Docker credentials.\n",
|
||||
"Uploading gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n",
|
||||
"Layer sha256:80d3506bc094600aada9dc076b44354b134277700f2420838db7b742c50533ed exists, skipping\n",
|
||||
"Layer sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770 exists, skipping\n",
|
||||
"Layer sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d exists, skipping\n",
|
||||
"Layer sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be exists, skipping\n",
|
||||
"Layer sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac exists, skipping\n",
|
||||
"Layer sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79 exists, skipping\n",
|
||||
"Layer sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c exists, skipping\n",
|
||||
"Layer sha256:5831cf619d1fb5d7b9430a0943017516edf2d83451941d468c78479b73f65975 exists, skipping\n",
|
||||
"Layer sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854 exists, skipping\n",
|
||||
"Layer sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6 exists, skipping\n",
|
||||
"Layer sha256:2f2b9c4bf759eaf2afb42e189cc50b21d4614d1892227349409d012a90355268 exists, skipping\n",
|
||||
"Layer sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c exists, skipping\n",
|
||||
"Layer sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc exists, skipping\n",
|
||||
"Layer sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323 exists, skipping\n",
|
||||
"Layer sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1 exists, skipping\n",
|
||||
"Layer sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d exists, skipping\n",
|
||||
"Layer sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0 exists, skipping\n",
|
||||
"Layer sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177 exists, skipping\n",
|
||||
"Layer sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e exists, skipping\n",
|
||||
"Layer sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8 exists, skipping\n",
|
||||
"Layer sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc exists, skipping\n",
|
||||
"Layer sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573 exists, skipping\n",
|
||||
"Layer sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245 exists, skipping\n",
|
||||
"Layer sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c exists, skipping\n",
|
||||
"Layer sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce exists, skipping\n",
|
||||
"Layer sha256:a4bc27d300aa1fec30a6da6b44b05c58052675425cb5b92e11cc081dec5af3aa pushed.\n",
|
||||
"Layer sha256:f2b9523fe427b5599019aff069e474f3a7bcd829aeb084a174cd8610df588068 pushed.\n",
|
||||
"Finished upload of: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n",
|
||||
"Pushed image gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7 in 3.5974774019996403s.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"preprocessor.preprocess()\n",
|
||||
"\n",
|
||||
|
@ -570,9 +961,83 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The job fairing-job-qg87g launched.\n",
|
||||
"Waiting for fairing-job-qg87g-chghc to start...\n",
|
||||
"Waiting for fairing-job-qg87g-chghc to start...\n",
|
||||
"Waiting for fairing-job-qg87g-chghc to start...\n",
|
||||
"Pod started running True\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"model_file not supplied; using the default\n",
|
||||
"model_file=mockup-model.dat\n",
|
||||
"[0]\tvalidation_0-rmse:154.15\n",
|
||||
"Will train until validation_0-rmse hasn't improved in 40 rounds.\n",
|
||||
"[1]\tvalidation_0-rmse:147.275\n",
|
||||
"[2]\tvalidation_0-rmse:140.414\n",
|
||||
"[3]\tvalidation_0-rmse:135.407\n",
|
||||
"[4]\tvalidation_0-rmse:131.662\n",
|
||||
"[5]\tvalidation_0-rmse:127.103\n",
|
||||
"[6]\tvalidation_0-rmse:123.558\n",
|
||||
"[7]\tvalidation_0-rmse:118.619\n",
|
||||
"[8]\tvalidation_0-rmse:115.743\n",
|
||||
"[9]\tvalidation_0-rmse:112.866\n",
|
||||
"[10]\tvalidation_0-rmse:110.533\n",
|
||||
"[11]\tvalidation_0-rmse:108.57\n",
|
||||
"[12]\tvalidation_0-rmse:107.407\n",
|
||||
"[13]\tvalidation_0-rmse:104.548\n",
|
||||
"[14]\tvalidation_0-rmse:102.625\n",
|
||||
"[15]\tvalidation_0-rmse:100.668\n",
|
||||
"[16]\tvalidation_0-rmse:99.4654\n",
|
||||
"[17]\tvalidation_0-rmse:98.1461\n",
|
||||
"[18]\tvalidation_0-rmse:96.71\n",
|
||||
"[19]\tvalidation_0-rmse:95.4135\n",
|
||||
"[20]\tvalidation_0-rmse:94.4105\n",
|
||||
"[21]\tvalidation_0-rmse:92.6454\n",
|
||||
"[22]\tvalidation_0-rmse:91.5752\n",
|
||||
"[23]\tvalidation_0-rmse:90.4496\n",
|
||||
"[24]\tvalidation_0-rmse:89.9257\n",
|
||||
"[25]\tvalidation_0-rmse:88.8438\n",
|
||||
"[26]\tvalidation_0-rmse:87.9895\n",
|
||||
"[27]\tvalidation_0-rmse:86.42\n",
|
||||
"[28]\tvalidation_0-rmse:85.2992\n",
|
||||
"[29]\tvalidation_0-rmse:84.6414\n",
|
||||
"[30]\tvalidation_0-rmse:84.3974\n",
|
||||
"[31]\tvalidation_0-rmse:83.2113\n",
|
||||
"[32]\tvalidation_0-rmse:82.5043\n",
|
||||
"[33]\tvalidation_0-rmse:81.3713\n",
|
||||
"[34]\tvalidation_0-rmse:81.2969\n",
|
||||
"[35]\tvalidation_0-rmse:79.9762\n",
|
||||
"[36]\tvalidation_0-rmse:79.084\n",
|
||||
"[37]\tvalidation_0-rmse:78.8726\n",
|
||||
"[38]\tvalidation_0-rmse:78.2066\n",
|
||||
"[39]\tvalidation_0-rmse:77.98\n",
|
||||
"[40]\tvalidation_0-rmse:76.8601\n",
|
||||
"[41]\tvalidation_0-rmse:76.3929\n",
|
||||
"[42]\tvalidation_0-rmse:76.0857\n",
|
||||
"[43]\tvalidation_0-rmse:75.4714\n",
|
||||
"[44]\tvalidation_0-rmse:74.4059\n",
|
||||
"[45]\tvalidation_0-rmse:73.5268\n",
|
||||
"[46]\tvalidation_0-rmse:73.0309\n",
|
||||
"[47]\tvalidation_0-rmse:72.4982\n",
|
||||
"[48]\tvalidation_0-rmse:71.9351\n",
|
||||
"[49]\tvalidation_0-rmse:71.3068\n",
|
||||
"mean_absolute_error=50.72\n",
|
||||
"Model export success: mockup-model.dat\n",
|
||||
"Best RMSE on eval: %.2f with %d rounds 71.306808 50\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pod_spec = builder.generate_pod_spec()\n",
|
||||
"train_deployer = job.job.Job(cleanup=False,\n",
|
||||
|
@ -593,9 +1058,96 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"apiVersion: v1\n",
|
||||
"items:\n",
|
||||
"- apiVersion: batch/v1\n",
|
||||
" kind: Job\n",
|
||||
" metadata:\n",
|
||||
" creationTimestamp: \"2019-10-25T01:48:20Z\"\n",
|
||||
" generateName: fairing-job-\n",
|
||||
" labels:\n",
|
||||
" fairing-deployer: job\n",
|
||||
" fairing-id: 85da7b32-f6c9-11e9-8e34-46c1cdc3ff41\n",
|
||||
" name: fairing-job-qg87g\n",
|
||||
" namespace: kubeflow-jlewi\n",
|
||||
" resourceVersion: \"625626\"\n",
|
||||
" selfLink: /apis/batch/v1/namespaces/kubeflow-jlewi/jobs/fairing-job-qg87g\n",
|
||||
" uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n",
|
||||
" spec:\n",
|
||||
" backoffLimit: 0\n",
|
||||
" completions: 1\n",
|
||||
" parallelism: 1\n",
|
||||
" selector:\n",
|
||||
" matchLabels:\n",
|
||||
" controller-uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n",
|
||||
" template:\n",
|
||||
" metadata:\n",
|
||||
" annotations:\n",
|
||||
" sidecar.istio.io/inject: \"false\"\n",
|
||||
" creationTimestamp: null\n",
|
||||
" labels:\n",
|
||||
" controller-uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n",
|
||||
" fairing-deployer: job\n",
|
||||
" fairing-id: 85da7b32-f6c9-11e9-8e34-46c1cdc3ff41\n",
|
||||
" job-name: fairing-job-qg87g\n",
|
||||
" name: fairing-deployer\n",
|
||||
" spec:\n",
|
||||
" containers:\n",
|
||||
" - command:\n",
|
||||
" - python\n",
|
||||
" - /app/build-train-deploy.py\n",
|
||||
" - train\n",
|
||||
" env:\n",
|
||||
" - name: FAIRING_RUNTIME\n",
|
||||
" value: \"1\"\n",
|
||||
" - name: GOOGLE_APPLICATION_CREDENTIALS\n",
|
||||
" value: /etc/secrets/user-gcp-sa.json\n",
|
||||
" image: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n",
|
||||
" imagePullPolicy: IfNotPresent\n",
|
||||
" name: fairing-job\n",
|
||||
" resources: {}\n",
|
||||
" securityContext:\n",
|
||||
" runAsUser: 0\n",
|
||||
" terminationMessagePath: /dev/termination-log\n",
|
||||
" terminationMessagePolicy: File\n",
|
||||
" volumeMounts:\n",
|
||||
" - mountPath: /etc/secrets\n",
|
||||
" name: user-gcp-sa\n",
|
||||
" readOnly: true\n",
|
||||
" workingDir: /app/\n",
|
||||
" dnsPolicy: ClusterFirst\n",
|
||||
" restartPolicy: Never\n",
|
||||
" schedulerName: default-scheduler\n",
|
||||
" securityContext: {}\n",
|
||||
" terminationGracePeriodSeconds: 30\n",
|
||||
" volumes:\n",
|
||||
" - name: user-gcp-sa\n",
|
||||
" secret:\n",
|
||||
" defaultMode: 420\n",
|
||||
" secretName: user-gcp-sa\n",
|
||||
" status:\n",
|
||||
" completionTime: \"2019-10-25T01:48:29Z\"\n",
|
||||
" conditions:\n",
|
||||
" - lastProbeTime: \"2019-10-25T01:48:29Z\"\n",
|
||||
" lastTransitionTime: \"2019-10-25T01:48:29Z\"\n",
|
||||
" status: \"True\"\n",
|
||||
" type: Complete\n",
|
||||
" startTime: \"2019-10-25T01:48:20Z\"\n",
|
||||
" succeeded: 1\n",
|
||||
"kind: List\n",
|
||||
"metadata:\n",
|
||||
" resourceVersion: \"\"\n",
|
||||
" selfLink: \"\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!kubectl get jobs -l fairing-id={train_deployer.job_id} -o yaml"
|
||||
]
|
||||
|
@ -620,9 +1172,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cluster endpoint: http://fairing-service-2bhtr.kubeflow-jlewi.svc.cluster.local:5000/predict\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from kubeflow.fairing.deployers import serving\n",
|
||||
"pod_spec = builder.generate_pod_spec()\n",
|
||||
|
@ -644,9 +1204,100 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"apiVersion: extensions/v1beta1\n",
|
||||
"kind: Deployment\n",
|
||||
"metadata:\n",
|
||||
" annotations:\n",
|
||||
" deployment.kubernetes.io/revision: \"1\"\n",
|
||||
" creationTimestamp: \"2019-10-25T01:48:34Z\"\n",
|
||||
" generateName: fairing-deployer-\n",
|
||||
" generation: 1\n",
|
||||
" labels:\n",
|
||||
" app: mockup\n",
|
||||
" fairing-deployer: serving\n",
|
||||
" fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n",
|
||||
" name: fairing-deployer-cnv5x\n",
|
||||
" namespace: kubeflow-jlewi\n",
|
||||
" resourceVersion: \"625670\"\n",
|
||||
" selfLink: /apis/extensions/v1beta1/namespaces/kubeflow-jlewi/deployments/fairing-deployer-cnv5x\n",
|
||||
" uid: 8e43b5b8-f6c9-11e9-8cd6-42010a8e012b\n",
|
||||
"spec:\n",
|
||||
" progressDeadlineSeconds: 600\n",
|
||||
" replicas: 1\n",
|
||||
" revisionHistoryLimit: 10\n",
|
||||
" selector:\n",
|
||||
" matchLabels:\n",
|
||||
" app: mockup\n",
|
||||
" fairing-deployer: serving\n",
|
||||
" fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n",
|
||||
" strategy:\n",
|
||||
" rollingUpdate:\n",
|
||||
" maxSurge: 25%\n",
|
||||
" maxUnavailable: 25%\n",
|
||||
" type: RollingUpdate\n",
|
||||
" template:\n",
|
||||
" metadata:\n",
|
||||
" annotations:\n",
|
||||
" sidecar.istio.io/inject: \"false\"\n",
|
||||
" creationTimestamp: null\n",
|
||||
" labels:\n",
|
||||
" app: mockup\n",
|
||||
" fairing-deployer: serving\n",
|
||||
" fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n",
|
||||
" name: fairing-deployer\n",
|
||||
" spec:\n",
|
||||
" containers:\n",
|
||||
" - command:\n",
|
||||
" - seldon-core-microservice\n",
|
||||
" - build-train-deploy.ModelServe\n",
|
||||
" - REST\n",
|
||||
" - --service-type=MODEL\n",
|
||||
" - --persistence=0\n",
|
||||
" env:\n",
|
||||
" - name: FAIRING_RUNTIME\n",
|
||||
" value: \"1\"\n",
|
||||
" image: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n",
|
||||
" imagePullPolicy: IfNotPresent\n",
|
||||
" name: model\n",
|
||||
" resources: {}\n",
|
||||
" securityContext:\n",
|
||||
" runAsUser: 0\n",
|
||||
" terminationMessagePath: /dev/termination-log\n",
|
||||
" terminationMessagePolicy: File\n",
|
||||
" workingDir: /app/\n",
|
||||
" dnsPolicy: ClusterFirst\n",
|
||||
" restartPolicy: Always\n",
|
||||
" schedulerName: default-scheduler\n",
|
||||
" securityContext: {}\n",
|
||||
" terminationGracePeriodSeconds: 30\n",
|
||||
"status:\n",
|
||||
" conditions:\n",
|
||||
" - lastTransitionTime: \"2019-10-25T01:48:34Z\"\n",
|
||||
" lastUpdateTime: \"2019-10-25T01:48:34Z\"\n",
|
||||
" message: Deployment does not have minimum availability.\n",
|
||||
" reason: MinimumReplicasUnavailable\n",
|
||||
" status: \"False\"\n",
|
||||
" type: Available\n",
|
||||
" - lastTransitionTime: \"2019-10-25T01:48:34Z\"\n",
|
||||
" lastUpdateTime: \"2019-10-25T01:48:35Z\"\n",
|
||||
" message: ReplicaSet \"fairing-deployer-cnv5x-744dc89c56\" is progressing.\n",
|
||||
" reason: ReplicaSetUpdated\n",
|
||||
" status: \"True\"\n",
|
||||
" type: Progressing\n",
|
||||
" observedGeneration: 1\n",
|
||||
" replicas: 1\n",
|
||||
" unavailableReplicas: 1\n",
|
||||
" updatedReplicas: 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!kubectl get deploy -o yaml {deployer.deployment.metadata.name}"
|
||||
]
|
||||
|
@ -664,7 +1315,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -673,9 +1324,21 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'util' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-33-9d29116b903d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mfull_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\":5000/predict\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_nparray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_X\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mNameError\u001b[0m: name 'util' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_url = url + \":5000/predict\"\n",
|
||||
"result = util.predict_nparray(full_url, test_X)\n",
|
||||
|
@ -869,9 +1532,9 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
"""Some routines to setup the notebook.
|
||||
|
||||
This is separated out from util.py because this module installs some of the pip packages
|
||||
that util depends on.
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.32/kfp.tar.gz'
|
||||
FAIRING_PACKAGE = 'git+git://github.com/kubeflow/fairing.git@7c93e888c3fc98bdf5fb0140e90f6407ce7a807b' # pylint: disable=line-too-long
|
||||
|
||||
def notebook_setup():
|
||||
# Install the SDK
|
||||
logging.basicConfig(format='%(message)s')
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logging.info("pip installing requirements.txt")
|
||||
subprocess.check_call(["pip3", "install", "--user", "-r", "requirements.txt"])
|
||||
logging.info("pip installing KFP %s", KFP_PACKAGE)
|
||||
subprocess.check_call(["pip3", "install", "--user", KFP_PACKAGE, "--upgrade"])
|
||||
logging.info("pip installing fairing %s", FAIRING_PACKAGE)
|
||||
subprocess.check_call(["pip3", "install", "--user", FAIRING_PACKAGE])
|
||||
|
||||
logging.info("Configure docker credentials")
|
||||
subprocess.check_call(["gcloud", "auth", "configure-docker", "--quiet"])
|
||||
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
|
||||
logging.info("Activating service account")
|
||||
subprocess.check_call(["gcloud", "auth", "activate-service-account",
|
||||
"--key-file=" +
|
||||
os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
|
||||
"--quiet"])
|
||||
|
||||
home = str(Path.home())
|
||||
|
||||
# Installing the python packages locally doesn't appear to have them automatically
|
||||
# added the path so we need to manually add the directory
|
||||
local_py_path = os.path.join(home, ".local/lib/python3.6/site-packages")
|
||||
if local_py_path not in sys.path:
|
||||
logging.info("Adding %s to python path", local_py_path)
|
||||
# Insert at front because we want to override any installed packages
|
||||
sys.path.insert(0, local_py_path)
|
|
@ -0,0 +1,39 @@
|
|||
import pytest
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--name", help="Name for the job. If not specified one was created "
|
||||
"automatically", type=str, default="")
|
||||
parser.addoption(
|
||||
"--namespace", help="The namespace for the application", type=str,
|
||||
default="kubeflow-test-infra")
|
||||
parser.addoption(
|
||||
"--image", help="Notebook image to use", type=str,
|
||||
default="gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-gpu"
|
||||
":v0.7.0")
|
||||
parser.addoption(
|
||||
"--repos", help="The repos to checkout; leave blank to use defaults",
|
||||
type=str, default="")
|
||||
parser.addoption(
|
||||
"--cluster", help="The cluster which the applition running in", type=str,
|
||||
default="")
|
||||
|
||||
@pytest.fixture
|
||||
def name(request):
|
||||
return request.config.getoption("--name")
|
||||
|
||||
@pytest.fixture
|
||||
def namespace(request):
|
||||
return request.config.getoption("--namespace")
|
||||
|
||||
@pytest.fixture
|
||||
def image(request):
|
||||
return request.config.getoption("--image")
|
||||
|
||||
@pytest.fixture
|
||||
def repos(request):
|
||||
return request.config.getoption("--repos")
|
||||
|
||||
@pytest.fixture
|
||||
def cluster(request):
|
||||
return request.config.getoption("--cluster")
|
|
@ -1,10 +1,15 @@
|
|||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: $(job_name)
|
||||
name: xgboost-test
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
# TODO(jlewi): Do we really want to disable sidecar injection
|
||||
# in the test? Would it be better to use istio to mimic what happens
|
||||
# in notebooks?
|
||||
sidecar.istio.io/inject: "false"
|
||||
labels:
|
||||
app: xgboost-synthetics-testing
|
||||
spec:
|
||||
|
@ -17,8 +22,10 @@ spec:
|
|||
- /usr/local/bin/checkout_repos.sh
|
||||
- --repos=kubeflow/examples@$(CHECK_TAG)
|
||||
- --src_dir=/src
|
||||
# TODO(jlewi): Do we need to do depth all here?
|
||||
- --depth=all
|
||||
name: checkout
|
||||
# TODO(jlewi): Set in kustomization.yaml?
|
||||
image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4
|
||||
volumeMounts:
|
||||
- mountPath: /src
|
||||
|
@ -35,17 +42,9 @@ spec:
|
|||
command: ["python3", "execute_notebook.py"]
|
||||
workingDir: /src/kubeflow/examples/xgboost_synthetic/testing
|
||||
volumeMounts:
|
||||
- mountPath: /var/secrets
|
||||
name: user-gcp-sa
|
||||
readOnly: true
|
||||
- mountPath: /src
|
||||
name: src
|
||||
env:
|
||||
- name: GOOGLE_APPLICATION_CREDENTIALS
|
||||
value: /var/secrets/user-gcp-sa.json
|
||||
serviceAccount: default-editor
|
||||
volumes:
|
||||
- name: user-gcp-sa
|
||||
secret:
|
||||
secretName: user-gcp-sa
|
||||
- name: src
|
||||
emptyDir: {}
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
namespace: kubeflow
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
resources:
|
||||
- job.yaml
|
||||
- role.yaml
|
||||
- rolebinding.yaml
|
||||
|
||||
configurations:
|
||||
- params.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: xgb-notebooks-tests
|
||||
literals:
|
||||
|
||||
vars:
|
||||
- fieldref:
|
||||
fieldPath: data.name
|
||||
name: job_name
|
||||
objref:
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
name: xgb-notebooks-tests
|
|
@ -1,3 +0,0 @@
|
|||
varReference:
|
||||
- path: metadata/name
|
||||
kind: Job
|
|
@ -1,14 +0,0 @@
|
|||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app: xgboost-synthetics-testing
|
||||
name: xgboost-synthetics-testing-role-binding
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: xgboost-synthetics-testing-role
|
||||
subjects:
|
||||
- kind: Group
|
||||
name: system:serviceaccounts
|
||||
apiGroup: rbac.authorization.k8s.io
|
|
@ -1,100 +1,125 @@
|
|||
import argparse
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import uuid
|
||||
import yaml
|
||||
|
||||
import pytest
|
||||
|
||||
from kubernetes import client as k8s_client
|
||||
from kubernetes.client import rest
|
||||
from kubeflow.testing import util
|
||||
|
||||
def create_job(args, app_dir): #pylint:disable=redefined-outer-name
|
||||
# TODO(jlewi): This test is currently failing because various things
|
||||
# need to be updated to work with 0.7.0. Until that's fixed we mark it
|
||||
# as expected to fail so we can begin to get signal.
|
||||
@pytest.mark.xfail
|
||||
def test_xgboost_synthetic(record_xml_attribute, name, namespace, cluster, # pylint: disable=too-many-branches,too-many-statements
|
||||
repos, image, app_dir):
|
||||
'''Generate Job and summit.'''
|
||||
util.run(['gcloud', 'auth', 'activate-service-account',
|
||||
"--key-file=/secret/gcp-credentials/key.json"], cwd=app_dir)
|
||||
util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container',
|
||||
"clusters", "get-credentials", "--zone=us-east1-b", args.cluster], cwd=app_dir)
|
||||
|
||||
configmap = 'xgb-notebooks-tests'
|
||||
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||
'--from-literal=name=' + args.name], cwd=app_dir)
|
||||
# For presubmit, set the checkout tag as HEAD:$(PULL_NUMBER), others set to PULL_BASE_SHA
|
||||
if args.jobType == 'presubmit':
|
||||
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||
'--from-literal=checkTag=HEAD:' + args.pullNumber], cwd=app_dir)
|
||||
else:
|
||||
util.run(['kustomize', 'edit', 'add', 'configmap', configmap,
|
||||
'--from-literal=checkTag=' + args.pullBaseSHA], cwd=app_dir)
|
||||
util.run(['kustomize', 'edit', 'set', 'namespace', args.namespace], cwd=app_dir)
|
||||
util.run(['kustomize', 'edit', 'set', 'image', 'execute-image=' + args.image], cwd=app_dir)
|
||||
util.run(['kustomize', 'build', app_dir, '-o', 'generated.yaml'], cwd=app_dir)
|
||||
util.run(['kubectl', 'apply', '-f', 'generated.yaml'], cwd=app_dir)
|
||||
logging.info("Created job %s in namespaces %s", args.name, args.namespace)
|
||||
|
||||
def get_pod_logs(name, namespace, app_dir): #pylint:disable=redefined-outer-name
|
||||
'''Cannot get logs by k8s python api, using kubectl command to get logs.'''
|
||||
logging.info("Getting pod %s logs...", name)
|
||||
util.run(['kubectl', 'logs', name, '-n', namespace], cwd=app_dir)
|
||||
|
||||
def check_job_status(namespace, app_dir): #pylint:disable=redefined-outer-name
|
||||
'''Cannot get job by k8s python api, using kubectl command to check job status.'''
|
||||
is_successed = False
|
||||
pod_info, pod_name, pod_status = '', '', ''
|
||||
for _ in range(0, 30):
|
||||
time.sleep(60)
|
||||
subCmd = "kubectl get pod -n " + namespace + " | grep -m1 xgboost-test"
|
||||
pod_info = subprocess.run(subCmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
shell=True,
|
||||
universal_newlines=True)
|
||||
if len(str(pod_info.stdout).split()) >= 2:
|
||||
pod_name = str(pod_info.stdout).split()[0]
|
||||
pod_status = str(pod_info.stdout).split()[2]
|
||||
|
||||
if pod_name:
|
||||
if pod_status == "Pending":
|
||||
logging.info("Pod %s is Pending.", pod_name)
|
||||
elif pod_status == "Running":
|
||||
logging.info("Pod %s is Running.", pod_name)
|
||||
elif pod_status == "Completed":
|
||||
logging.info("Pod %s is Completed.", pod_name)
|
||||
get_pod_logs(pod_name, namespace, app_dir)
|
||||
is_successed = True
|
||||
break
|
||||
elif pod_status == "Error":
|
||||
get_pod_logs(pod_name, namespace, app_dir)
|
||||
raise RuntimeError("Failed to execute notebook.")
|
||||
else:
|
||||
logging.warning("Pod %s status %s.", pod_name, pod_status)
|
||||
else:
|
||||
logging.warning("Cannot get the pod name, retry after 60 seconds.")
|
||||
|
||||
if not is_successed:
|
||||
raise RuntimeError("Timeout to get the executing notebook pod after 30 munites.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--name", help="deploy application name", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--namespace", help="The namespace for the application", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--image", help="Image name for the application", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--pullNumber", help="The PR number", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--pullBaseSHA", help="The pull base SHA", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--jobType", help="The job type such as presubmit or postsubmit", type=str, required=True)
|
||||
parser.add_argument(
|
||||
"--cluster", help="The cluster which the applition running in", type=str, required=True)
|
||||
|
||||
app_dir = os.path.dirname(__file__)
|
||||
util.set_pytest_junit(record_xml_attribute, "test_xgboost_synthetic")
|
||||
app_dir = os.path.abspath(app_dir)
|
||||
|
||||
args = parser.parse_args()
|
||||
create_job(args, app_dir)
|
||||
check_job_status(args.namespace, app_dir)
|
||||
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
|
||||
util.run(['gcloud', 'auth', 'activate-service-account',
|
||||
"--key-file=" + os.getenv("GOOGLE_APPLICATION_CREDENTIALS")],
|
||||
cwd=app_dir)
|
||||
|
||||
# TODO(jlewi): We should just assume that kubeconfig has been set.
|
||||
if cluster:
|
||||
util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container',
|
||||
"clusters", "get-credentials", "--zone=us-east1-b", cluster],
|
||||
cwd=app_dir)
|
||||
|
||||
with open("job.yaml") as hf:
|
||||
job = yaml.load(hf)
|
||||
|
||||
# We need to checkout the correct version of the code
|
||||
# in presubmits and postsubmits. We should check the environment variables
|
||||
# for the prow environment variables to get the appropriate values.
|
||||
# We should probably also only do that if the
|
||||
# See
|
||||
# https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables
|
||||
if not repos:
|
||||
version = "@HEAD"
|
||||
if os.getenv("PULL_NUMBER"):
|
||||
version = "@{0}:{1}".format(os.getenv("PULL_PULL_SHA"),
|
||||
os.getenv("PULL_NUMBER"))
|
||||
|
||||
else:
|
||||
if os.getenv("PULL_BASE_SHA"):
|
||||
version = "@{0}".format(os.getenv("PULL_BASE_SHA"))
|
||||
|
||||
repos = "kubeflow/examples" + version
|
||||
|
||||
logging.info("Repos set to %s", repos)
|
||||
job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [
|
||||
"/usr/local/bin/checkout_repos.sh",
|
||||
"--repos=" + repos,
|
||||
"--src_dir=/src",
|
||||
"--depth=all",
|
||||
]
|
||||
job["spec"]["template"]["spec"]["containers"][0]["image"] = image
|
||||
util.load_kube_config(persist_config=False)
|
||||
|
||||
if name:
|
||||
job["metadata"]["name"] = name
|
||||
else:
|
||||
job["metadata"]["name"] = ("xgboost-test-" +
|
||||
datetime.datetime.now().strftime("%H%M%S")
|
||||
+ "-" + uuid.uuid4().hex[0:3])
|
||||
|
||||
job["metadata"]["namespace"] = namespace
|
||||
|
||||
# Create an API client object to talk to the K8s master.
|
||||
api_client = k8s_client.ApiClient()
|
||||
batch_api = k8s_client.BatchV1Api(api_client)
|
||||
|
||||
logging.info("Creating job:\n%s", yaml.dump(job))
|
||||
batch_api.create_namespaced_job(job["metadata"]["namespace"], job)
|
||||
logging.info("Created job %s in namespaces %s", name, namespace)
|
||||
|
||||
# Wait for job.
|
||||
end_time = datetime.datetime.now() + datetime.timedelta(
|
||||
minutes=15)
|
||||
|
||||
last_condition = None
|
||||
while datetime.datetime.now() < end_time:
|
||||
try:
|
||||
job = batch_api.read_namespaced_job(name, namespace)
|
||||
except rest.ApiException as e:
|
||||
logging.error("There was a problem getting job %s.%s; %s",
|
||||
namespace, name, e)
|
||||
time.sleep(10)
|
||||
continue
|
||||
# ready_replicas could be None
|
||||
if not job.conditions:
|
||||
logging.info("Job missing condition")
|
||||
time.sleep(10)
|
||||
continue
|
||||
|
||||
last_condition = job.conditions[-1]
|
||||
if last_condition["type"] in ["Failed", "Complete"]:
|
||||
break
|
||||
logging.info("Waiting for job %s.%s", namespace, name)
|
||||
time.sleep(10)
|
||||
|
||||
logging.info("Final Job spec:\n%s", yaml.safe_dump(job))
|
||||
util.run(["kubectl", "describe", "job", "-n", namespace, name])
|
||||
|
||||
if not last_condition or last_condition["type"] not in ["Failed", "Complete"]:
|
||||
logging.error("Timeout waiting for job %s.%s to finish.", namespace, name)
|
||||
raise RuntimeError("Job {0}.{1} has last condition {2} which is not "
|
||||
"Complete".format(namespace, name,
|
||||
last_condition["type"] in ["Failed", "Complete"]))
|
||||
assert last_condition["type"] == "Complete"
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format=('%(levelname)s|%(asctime)s'
|
||||
'|%(pathname)s|%(lineno)d| %(message)s'),
|
||||
datefmt='%Y-%m-%dT%H:%M:%S',
|
||||
)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
pytest.main()
|
||||
|
|
|
@ -1,27 +1,11 @@
|
|||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import json
|
||||
import shutil
|
||||
import requests
|
||||
|
||||
from retrying import retry
|
||||
import numpy as np
|
||||
|
||||
KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.20/kfp.tar.gz'
|
||||
def notebook_setup():
|
||||
# Install the SDK
|
||||
|
||||
subprocess.check_call(["pip3", "install", "-r", "requirements.txt"])
|
||||
subprocess.check_call(["pip3", "install", KFP_PACKAGE, "--upgrade"])
|
||||
|
||||
logging.basicConfig(format='%(message)s')
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
subprocess.check_call(["gcloud", "auth", "configure-docker", "--quiet"])
|
||||
subprocess.check_call(["gcloud", "auth", "activate-service-account",
|
||||
"--key-file=" + os.getenv("GOOGLE_APPLICATION_CREDENTIALS"),
|
||||
"--quiet"])
|
||||
|
||||
def copy_data_to_nfs(nfs_path, model_dir):
|
||||
if not os.path.exists(nfs_path):
|
||||
shutil.copytree("ames_dataset", nfs_path)
|
||||
|
|
Loading…
Reference in New Issue