From 7e28cd6b23f9b3d76c242cb16391ccbf01896516 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 24 Oct 2019 19:53:38 -0700 Subject: [PATCH] Update xgboost_synthetic test infra; preliminary updates to work with 0.7.0 (#666) * Update xgboost_synthetic test infra to use pytest and pyfunc. * Related to #655 update xgboost_synthetic to use workload identity * Related to to #665 no signal about xgboost_synthetic * We need to update the xgboost_synthetic example to work with 0.7.0; e.g. workload identity * This PR focuses on updating the test infra and some preliminary updates the notebook * More fixes to the test and the notebook are probably needed in order to get it to actually pass * Update job spec for 0.7; remove the secret and set the default service account. * This is to make it work with workload identity * Instead of using kustomize to define the job to run the notebook we can just modify the YAML spec using python. * Use the python API for K8s to create the job rather than shelling out. * Notebook should do a 0.7 compatible check for credentials * We don't want to assume GOOGLE_APPLICATION_CREDENTIALS is set because we will be using workload identity. * Take in repos as an argument akin to what checkout_repos.sh requires * Convert xgboost_test.py to a pytest. * This allows us to mark it as expected to fail so we can start to get signal without blocking * We also need to emit junit files to show up in test grid. * Convert the jsonnet workflow for the E2E test to a python function to define the workflow. * Remove the old jsonnet workflow. * Address comments. * Fix issues with the notebook * Install pip packages in user space * 0.7.0 images are based on TF images and they have different permissions * Install a newer version of fairing sdk that works with workload identity * Split pip installing dependencies out of util.py and into notebook_setup.py * That's because util.py could depend on the packages being installed by notebook_setup.py * After pip installing the modules into user space; we need to add the local path for pip packages to the python otherwise we get import not found errors. --- .pylintrc | 2 +- prow_config.yaml | 10 +- py/README.md | 6 + py/kubeflow/__init__.py | 1 + py/kubeflow/examples/__init__.py | 0 py/kubeflow/examples/create_e2e_workflow.py | 389 +++++++++ .../components/xgboost_synthetic.jsonnet | 439 ---------- xgboost_synthetic/Dockerfile | 7 +- xgboost_synthetic/build-train-deploy.ipynb | 817 ++++++++++++++++-- xgboost_synthetic/notebook_setup.py | 45 + xgboost_synthetic/testing/conftest.py | 39 + xgboost_synthetic/testing/job.yaml | 19 +- xgboost_synthetic/testing/kustomization.yaml | 28 - xgboost_synthetic/testing/params.yaml | 3 - xgboost_synthetic/testing/rolebinding.yaml | 14 - xgboost_synthetic/testing/xgboost_test.py | 207 +++-- xgboost_synthetic/util.py | 20 +- 17 files changed, 1360 insertions(+), 686 deletions(-) create mode 100644 py/README.md create mode 100644 py/kubeflow/__init__.py create mode 100644 py/kubeflow/examples/__init__.py create mode 100644 py/kubeflow/examples/create_e2e_workflow.py delete mode 100644 test/workflows/components/xgboost_synthetic.jsonnet create mode 100644 xgboost_synthetic/notebook_setup.py create mode 100644 xgboost_synthetic/testing/conftest.py delete mode 100644 xgboost_synthetic/testing/kustomization.yaml delete mode 100644 xgboost_synthetic/testing/params.yaml delete mode 100644 xgboost_synthetic/testing/rolebinding.yaml diff --git a/.pylintrc b/.pylintrc index bd424053..e133caa2 100644 --- a/.pylintrc +++ b/.pylintrc @@ -56,7 +56,7 @@ confidence= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals +disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,missing-docstring,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating,relative-import,invalid-name,bad-continuation,no-member,locally-disabled,fixme,import-error,too-many-locals,no-name-in-module,too-many-instance-attributes,no-self-use [REPORTS] diff --git a/prow_config.yaml b/prow_config.yaml index 4384ce0d..49d15dce 100644 --- a/prow_config.yaml +++ b/prow_config.yaml @@ -1,5 +1,7 @@ # This file configures the workflows to trigger in our Prow jobs. # see kubeflow/testing/py/run_e2e_workflow.py +python_paths: + - kubeflow/examples/py workflows: - app_dir: kubeflow/examples/test/workflows component: workflows @@ -62,10 +64,10 @@ workflows: include_dirs: - pytorch_mnist/* - # E2E test for xgboost-synthetic - - app_dir: kubeflow/examples/test/workflows - component: xgboost_synthetic - name: xgboost2 + # E2E test for various notebooks + # New notebooks can just add a step to the workflow + - py_func: kubeflow.examples.create_e2e_workflow.create_workflow + name: notebooks job_types: - periodic - presubmit diff --git a/py/README.md b/py/README.md new file mode 100644 index 00000000..541f3b72 --- /dev/null +++ b/py/README.md @@ -0,0 +1,6 @@ +# Internal code for testing of examples + +This directory contains some python utilities reused for testing across +example. + +No actual examples are in this directory. diff --git a/py/kubeflow/__init__.py b/py/kubeflow/__init__.py new file mode 100644 index 00000000..69e3be50 --- /dev/null +++ b/py/kubeflow/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/py/kubeflow/examples/__init__.py b/py/kubeflow/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/py/kubeflow/examples/create_e2e_workflow.py b/py/kubeflow/examples/create_e2e_workflow.py new file mode 100644 index 00000000..a4ad17b7 --- /dev/null +++ b/py/kubeflow/examples/create_e2e_workflow.py @@ -0,0 +1,389 @@ +""""Define the E2E workflow for kubeflow examples. + +Rapid iteration. + +Here are some pointers for rapidly iterating on the workflow during development. + +1. You can use the e2e_tool.py to directly launch the workflow on a K8s cluster. + If you don't have CLI access to the kubeflow-ci cluster (most folks) then + you would need to setup your own test cluster. + +2. Running with the E2E tool. + +export PYTHONPATH=${PYTHONPATH}:${KUBEFLOW_EXAMPLES}/py:${KUBEFLOW_TESTING_REPO}/py + +python -m kubeflow.testing.e2e_tool apply \ + kubeflow.examples.create_e2e_workflow.create_workflow + --name=${USER}-kfctl-test-$(date +%Y%m%d-%H%M%S) \ + --namespace=kubeflow-test-infra \ + --open-in-chrome=true + +To use code from a pull request set the prow envariables; e.g. + +export JOB_NAME="jlewi-test" +export JOB_TYPE="presubmit" +export BUILD_ID=1234 +export PROW_JOB_ID=1234 +export REPO_OWNER=kubeflow +export REPO_NAME=kubeflow +export PULL_NUMBER=4148 +""" + +import logging +import os + +from kubeflow.testing import argo_build_util + +# The name of the NFS volume claim to use for test files. +NFS_VOLUME_CLAIM = "nfs-external" +# The name to use for the volume to use to contain test data +DATA_VOLUME = "kubeflow-test-volume" + +# This is the main dag with the entrypoint +E2E_DAG_NAME = "e2e" +EXIT_DAG_NAME = "exit-handler" + +# This is a sub dag containing the suite of tests to run against +# Kubeflow deployment +TESTS_DAG_NAME = "gke-tests" + +TEMPLATE_LABEL = "examples_e2e" + +MAIN_REPO = "kubeflow/examples" + +EXTRA_REPOS = ["kubeflow/testing@HEAD"] + +class Builder: + def __init__(self, name=None, namespace=None, test_target_name=None, + bucket=None, + **kwargs): # pylint: disable=unused-argument + """Initialize a builder. + + Args: + name: Name for the workflow. + namespace: Namespace for the workflow. + test_target_name: (Optional) Name to use as the test target to group + tests. + """ + self.name = name + self.namespace = namespace + # **************************************************************************** + # Define directory locations + # **************************************************************************** + # mount_path is the directory where the volume to store the test data + # should be mounted. + self.mount_path = "/mnt/" + "test-data-volume" + # test_dir is the root directory for all data for a particular test run. + self.test_dir = self.mount_path + "/" + self.name + # output_dir is the directory to sync to GCS to contain the output for this + # job. + self.output_dir = self.test_dir + "/output" + + # We prefix the artifacts directory with junit because + # that's what spyglass/prow requires. This ensures multiple + # instances of a workflow triggered by the same prow job + # don't end up clobbering each other + self.artifacts_dir = self.output_dir + "/artifacts/junit_{0}".format(name) + + # source directory where all repos should be checked out + self.src_root_dir = self.test_dir + "/src" + # The directory containing the kubeflow/examples repo + self.src_dir = self.src_root_dir + "/kubeflow/examples" + + # Top level directories for python code + self.kubeflow_py = self.src_dir + + # The directory within the kubeflow_testing submodule containing + # py scripts to use. + self.kubeflow_testing_py = self.src_root_dir + "/kubeflow/testing/py" + + # The class name to label junit files. + # We want to be able to group related tests in test grid. + # Test grid allows grouping by target which corresponds to the classname + # attribute in junit files. + # So we set an environment variable to the desired class name. + # The pytest modules can then look at this environment variable to + # explicitly override the classname. + # The classname should be unique for each run so it should take into + # account the different parameters + self.test_target_name = test_target_name + + self.bucket = bucket + self.workflow = None + + def _build_workflow(self): + """Create the scaffolding for the Argo workflow""" + workflow = { + "apiVersion": "argoproj.io/v1alpha1", + "kind": "Workflow", + "metadata": { + "name": self.name, + "namespace": self.namespace, + "labels": argo_build_util.add_dicts([{ + "workflow": self.name, + "workflow_template": TEMPLATE_LABEL, + }, argo_build_util.get_prow_labels()]), + }, + "spec": { + "entrypoint": E2E_DAG_NAME, + # Have argo garbage collect old workflows otherwise we overload the API + # server. + "ttlSecondsAfterFinished": 7 * 24 * 60 * 60, + "volumes": [ + { + "name": "gcp-credentials", + "secret": { + "secretName": "kubeflow-testing-credentials", + }, + }, + { + "name": DATA_VOLUME, + "persistentVolumeClaim": { + "claimName": NFS_VOLUME_CLAIM, + }, + }, + ], + "onExit": EXIT_DAG_NAME, + "templates": [ + { + "dag": { + "tasks": [], + }, + "name": E2E_DAG_NAME, + }, + { + "dag": { + "tasks": [], + }, + "name": TESTS_DAG_NAME, + + }, + { + "dag": { + "tasks": [], + }, + "name": EXIT_DAG_NAME, + } + ], + }, # spec + } # workflow + + return workflow + + def _build_task_template(self): + """Return a template for all the tasks""" + + task_template = {'activeDeadlineSeconds': 3000, + 'container': {'command': [], + 'env': [ + {"name": "GOOGLE_APPLICATION_CREDENTIALS", + "value": "/secret/gcp-credentials/key.json"}, + {"name": "TEST_TARGET_NAME", + "value": self.test_target_name}, + ], + 'image': 'gcr.io/kubeflow-ci/test-worker:latest', + 'imagePullPolicy': 'Always', + 'name': '', + 'resources': {'limits': {'cpu': '4', 'memory': '4Gi'}, + 'requests': {'cpu': '1', 'memory': '1536Mi'}}, + 'volumeMounts': [{'mountPath': '/mnt/test-data-volume', + 'name': 'kubeflow-test-volume'}, + {'mountPath': + '/secret/gcp-credentials', + 'name': 'gcp-credentials'}]}, + 'metadata': {'labels': { + 'workflow_template': TEMPLATE_LABEL}}, + 'outputs': {}} + + # Define common environment variables to be added to all steps + common_env = [ + {'name': 'PYTHONPATH', + 'value': ":".join([self.kubeflow_py, self.kubeflow_py + "/py", + self.kubeflow_testing_py,])}, + {'name': 'KUBECONFIG', + 'value': os.path.join(self.test_dir, 'kfctl_test/.kube/kubeconfig')}, + ] + + task_template["container"]["env"].extend(common_env) + + task_template = argo_build_util.add_prow_env(task_template) + + return task_template + + def _build_step(self, name, workflow, dag_name, task_template, + command, dependencies): + """Syntactic sugar to add a step to the workflow""" + + step = argo_build_util.deep_copy(task_template) + + step["name"] = name + step["container"]["command"] = command + + argo_build_util.add_task_to_dag(workflow, dag_name, step, dependencies) + + # Return the newly created template; add_task_to_dag makes a copy of the template + # So we need to fetch it from the workflow spec. + for t in workflow["spec"]["templates"]: + if t["name"] == name: + return t + + return None + + def _build_tests_dag(self): + """Build the dag for the set of tests to run against a KF deployment.""" + + task_template = self._build_task_template() + + # *************************************************************************** + # Test xgboost + step_name = "xgboost-synthetic" + command = ["pytest", "xgboost_test.py", + # I think -s mean stdout/stderr will print out to aid in debugging. + # Failures still appear to be captured and stored in the junit file. + "-s", + # Increase the log level so that info level log statements show up. + "--log-cli-level=info", + # Test timeout in seconds. + "--timeout=1800", + "--junitxml=" + self.artifacts_dir + "/junit_xgboost-synthetic-test.xml", + ] + + dependencies = [] + xgboost_step = self._build_step(step_name, self.workflow, TESTS_DAG_NAME, task_template, + command, dependencies) + xgboost_step["container"]["workingDir"] = os.path.join(self.src_dir, + "xgboost_synthetic", + "testing") + + + def _build_exit_dag(self): + """Build the exit handler dag""" + task_template = self._build_task_template() + + # *********************************************************************** + # Copy artifacts + step_name = "copy-artifacts" + command = ["python", + "-m", + "kubeflow.testing.prow_artifacts", + "--artifacts_dir=" + + self.output_dir, + "copy_artifacts"] + + if self.bucket: + command.append("--bucket=" + self.bucket) + + dependencies = [] + + copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template, + command, dependencies) + + # TODO(jlewi): We may need to run this with retries kubeflow/kubeflow + # has a python script run with retries; we might want to move that + # over to kubeflow.testing and use it. + step_name = "test-dir-delete" + command = ["rm", + "-rf", + self.test_dir, ] + dependencies = [copy_artifacts["name"]] + copy_artifacts = self._build_step(step_name, self.workflow, EXIT_DAG_NAME, task_template, + command, dependencies) + + # We don't want to run from the directory we are trying to delete. + copy_artifacts["container"]["workingDir"] = "/" + + def build(self): + self.workflow = self._build_workflow() + task_template = self._build_task_template() + + # ************************************************************************** + # Checkout + + # create the checkout step + main_repo = argo_build_util.get_repo_from_prow_env() + if not main_repo: + logging.info("Prow environment variables for repo not set") + main_repo = MAIN_REPO + "@HEAD" + logging.info("Main repository: %s", main_repo) + repos = [main_repo] + + repos.extend(EXTRA_REPOS) + + #*************************************************************************** + # Checkout the code + checkout = argo_build_util.deep_copy(task_template) + + checkout["name"] = "checkout" + checkout["container"]["command"] = ["/usr/local/bin/checkout_repos.sh", + "--repos=" + ",".join(repos), + "--src_dir=" + self.src_root_dir] + + argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, checkout, []) + + #*************************************************************************** + # Get credentials for the latest auto-deployed cluster + + credentials = argo_build_util.deep_copy(task_template) + + credentials["name"] = "get-credentials" + credentials["container"]["command"] = ["python3", + "-m", + "kubeflow.testing." + "get_kf_testing_cluster", + "get-credentials", + ] + + dependencies = [checkout["name"]] + argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, credentials, + dependencies) + + #************************************************************************** + # Run a dag of tests + self._build_tests_dag() + + # Add a task to run the dag + dependencies = [credentials["name"]] + argo_build_util.add_task_only_to_dag(self.workflow, E2E_DAG_NAME, + TESTS_DAG_NAME, + TESTS_DAG_NAME, + dependencies) + + # ************************************************************************** + # create_pr_symlink + # *************************************************************************** + # TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink + step_name = "create-pr-symlink" + command = ["python", + "-m", + "kubeflow.testing.prow_artifacts", + "--artifacts_dir=" + self.output_dir, + "create_pr_symlink"] + + if self.bucket: + command.append(self.bucket) + + dependencies = [checkout["name"]] + self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, + command, dependencies) + + self._build_exit_dag() + + # Set the labels on all templates + self.workflow = argo_build_util.set_task_template_labels(self.workflow) + + return self.workflow + +# TODO(jlewi): This is an unnecessary layer of indirection around the builder +# We should allow py_func in prow_config to point to the builder and +# let e2e_tool take care of this. +def create_workflow(**kwargs): # pylint: disable=too-many-statements + """Create workflow returns an Argo workflow to test kfctl upgrades. + + Args: + name: Name to give to the workflow. This can also be used to name things + associated with the workflow. + """ + + builder = Builder(**kwargs) + + return builder.build() diff --git a/test/workflows/components/xgboost_synthetic.jsonnet b/test/workflows/components/xgboost_synthetic.jsonnet deleted file mode 100644 index d38ea69f..00000000 --- a/test/workflows/components/xgboost_synthetic.jsonnet +++ /dev/null @@ -1,439 +0,0 @@ -// Test workflow for XGBoost Housing example. -// -local env = std.extVar("__ksonnet/environments"); -local overrides = std.extVar("__ksonnet/params").components.xgboost_synthetic; - -local k = import "k.libsonnet"; -local util = import "util.libsonnet"; - -// Define default params and then combine them with any overrides -local defaultParams = { - // local nfsVolumeClaim: "kubeflow-testing", - nfsVolumeClaim: "nfs-external", - - // The name to use for the volume to use to contain test data. - dataVolume: "kubeflow-test-volume", - - // Default step image: - stepImage: "gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4", - - // Which Kubeflow cluster to use for running PytorchJobs on. - kfProject: "kubeflow-ci-deployment", - kfZone: "us-east1-b", - kfCluster: "kf-vmaster-n00", - - // The bucket where the model should be written - // This needs to be writable by the GCP service account in the Kubeflow cluster (not the test cluster) - modelBucket: "kubeflow-ci_temp", - - // Whether to delete the namespace at the end. - // Leaving the namespace around can be useful for debugging. - // - // TODO(jlewi): We should consider running a cronjob to GC namespaces. - // But if we leave namespaces up; then we end up leaving the servers up which - // uses up CPU. - // - deleteNamespace: true, -}; - -local params = defaultParams + overrides; - -local prowEnv = util.parseEnv(params.prow_env); - -// Create a dictionary of the different prow variables so we can refer to them in the workflow. -// -// Important: We want to initialize all variables we reference to some value. If we don't -// and we reference a variable which doesn't get set then we get very hard to debug failure messages. -// In particular, we've seen problems where if we add a new environment and evaluate one component eg. "workflows" -// and another component e.g "code_search.jsonnet" doesn't have a default value for BUILD_ID then ksonnet -// fails because BUILD_ID is undefined. -local prowDict = { - BUILD_ID: "notset", - BUILD_NUMBER: "notset", - REPO_OWNER: "notset", - REPO_NAME: "notset", - JOB_NAME: "notset", - JOB_TYPE: "notset", - PULL_NUMBER: "notset", - PULL_BASE_SHA: "notset", - } + util.listOfDictToMap(prowEnv); - -local bucket = params.bucket; - -// mountPath is the directory where the volume to store the test data -// should be mounted. -local mountPath = "/mnt/" + "test-data-volume"; -// testDir is the root directory for all data for a particular test run. -local testDir = mountPath + "/" + params.name; -// outputDir is the directory to sync to GCS to contain the output for this job. -local outputDir = testDir + "/output"; -local artifactsDir = outputDir + "/artifacts"; - -// Source directory where all repos should be checked out -local srcRootDir = testDir + "/src"; - -// The directory containing the kubeflow/kubeflow repo -local srcDir = srcRootDir + "/" + prowDict.REPO_OWNER + "/" + prowDict.REPO_NAME; - -// These variables control where the docker images get pushed and what -// tag to use -local executeImage = "gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0"; - -// value of KUBECONFIG environment variable. This should be a full path. -local kubeConfig = testDir + "/.kube/kubeconfig"; - -// Namespace where tests should run -local testNamespace = "xgboost-synthetic-" + prowDict["BUILD_ID"]; - -// The directory within the kubeflow_testing submodule containing -// py scripts to use. -local kubeflowTestingPy = srcRootDir + "/kubeflow/testing/py"; - -// Workflow template is the name of the workflow template; typically the name of the ks component. -// This is used as a label to make it easy to identify all Argo workflows created from a given -// template. -local workflow_template = "xgboost_synthetic"; - -// Build template is a template for constructing Argo step templates. -// -// step_name: Name for the template -// command: List to pass as the container command. -// -// We customize the defaults for each step in the workflow by modifying -// buildTemplate.argoTemplate -local buildTemplate = { - // name & command variables should be overwritten for every test. - // Other variables can be changed per step as needed. - // They are hidden because they shouldn't be included in the Argo template - name: "", - command:: "", - image: params.stepImage, - workingDir:: null, - env_vars:: [], - side_cars: [], - pythonPath: kubeflowTestingPy, - - activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template - - local template = self, - - // Actual template for Argo - argoTemplate: { - name: template.name, - metadata: { - labels: prowDict + { - workflow: params.name, - workflow_template: workflow_template, - step_name: template.name, - }, - }, - container: { - command: template.command, - name: template.name, - image: template.image, - workingDir: template.workingDir, - env: [ - { - // Add the source directories to the python path. - name: "PYTHONPATH", - value: template.pythonPath, - }, - { - name: "GOOGLE_APPLICATION_CREDENTIALS", - value: "/secret/gcp-credentials/key.json", - }, - { - name: "GITHUB_TOKEN", - valueFrom: { - secretKeyRef: { - name: "github-token", - key: "github_token", - }, - }, - }, - { - // We use a directory in our NFS share to store our kube config. - // This way we can configure it on a single step and reuse it on subsequent steps. - name: "KUBECONFIG", - value: kubeConfig, - }, - ] + prowEnv + template.env_vars, - volumeMounts: [ - { - name: params.dataVolume, - mountPath: mountPath, - }, - { - name: "github-token", - mountPath: "/secret/github-token", - }, - { - name: "gcp-credentials", - mountPath: "/secret/gcp-credentials", - }, - ], - }, - }, -}; // buildTemplate - - -// Create a list of dictionary. -// Each item is a dictionary describing one step in the graph. -local dagTemplates = [ - { - template: buildTemplate { - name: "checkout", - command: - ["/usr/local/bin/checkout.sh", srcRootDir], - - env_vars: [{ - name: "EXTRA_REPOS", - // TODO(jlewi): Pin to commit on master when #281 is checked in. - value: "kubeflow/testing@HEAD:281", - }], - }, - dependencies: null, - }, // checkout - { - // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink - // should be done by run_e2e_workflow.py - template: buildTemplate { - name: "create-pr-symlink", - command: [ - "python", - "-m", - "kubeflow.testing.prow_artifacts", - "--artifacts_dir=" + outputDir, - "create_pr_symlink", - "--bucket=" + params.bucket, - ], - }, // create-pr-symlink - dependencies: ["checkout"], - }, // create-pr-symlink - { - // Configure KUBECONFIG - template: buildTemplate { - name: "get-kubeconfig", - command: util.buildCommand([ - [ - "gcloud", - "auth", - "activate-service-account", - "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", - ], - [ - "gcloud", - "--project=" + params.kfProject, - "container", - "clusters", - "get-credentials", - "--zone=" + params.kfZone, - params.kfCluster, - ]] - ), - }, - dependencies: ["checkout"], - }, // get-kubeconfig - { - // Create the namespace - // TODO(jlewi): We should add some sort of retry. - template: buildTemplate { - name: "create-namespace", - command: util.buildCommand([ - [ - "echo", - "KUBECONFIG=", - "${KUBECONFIG}", - ], - [ - "gcloud", - "auth", - "activate-service-account", - "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", - ], - [ - "kubectl", - "config" , - "current-context", - ], - [ - "kubectl", - "create", - "namespace", - testNamespace, - ], - # Copy the GCP secret from the kubeflow namespace to the test namespace - [ - srcDir + "/test/copy_secret.sh", - "kubeflow", - testNamespace, - "user-gcp-sa", - ]] - ), - }, - dependencies: ["get-kubeconfig"], - }, // create-namespace - { - template: buildTemplate { - name: "execute-notebook", - command: [ - "python3", - "xgboost_test.py", - "--name=" + "xgboost-test-" + prowDict["BUILD_ID"], - "--namespace=" + testNamespace, - "--image=" + executeImage, - "--jobType=" + prowDict["JOB_TYPE"], - "--pullNumber=" + prowDict["PULL_NUMBER"], - "--pullBaseSHA=" + prowDict["PULL_BASE_SHA"], - "--cluster=" + params.kfCluster, - ], - pythonPath: kubeflowTestingPy, - workingDir: srcDir + "/xgboost_synthetic/testing", - }, - dependencies: ["create-namespace"], - }, // execute-notebook -]; - -// Dag defines the tasks in the graph -local dag = { - name: "e2e", - // Construct tasks from the templates - // we will give the steps the same name as the template - dag: { - tasks: util.toArgoTaskList(dagTemplates), - }, -}; // dag - -// Define templates for the steps to be performed when the -// test exits - -local deleteTemplates = if params.deleteNamespace then - [ - { - // Delete the namespace - // TODO(jlewi): We should add some sort of retry. - template: buildTemplate { - name: "delete-namespace", - command: util.buildCommand([ - [ - "gcloud", - "auth", - "activate-service-account", - "--key-file=${GOOGLE_APPLICATION_CREDENTIALS}", - ], - [ - "kubectl", - "delete", - "namespace", - testNamespace, - ]] - ), - }, - }, // delete-namespace - ] else []; - -local exitTemplates = - deleteTemplates + - [ - { - // Copy artifacts to GCS for gubernator. - // TODO(https://github.com/kubeflow/testing/issues/257): Create-pr-symlink - // should be done by run_e2e_workflow.py - template: buildTemplate { - name: "copy-artifacts", - command: [ - "python", - "-m", - "kubeflow.testing.prow_artifacts", - "--artifacts_dir=" + outputDir, - "copy_artifacts", - "--bucket=" + bucket, - ], - }, // copy-artifacts, - }, - { - // Delete the test directory in NFS. - // TODO(https://github.com/kubeflow/testing/issues/256): Use an external process to do this. - template: - buildTemplate { - name: "test-dir-delete", - command: [ - "rm", - "-rf", - testDir, - ], - - argoTemplate+: { - retryStrategy: { - limit: 3, - }, - }, - }, // test-dir-delete - dependencies: ["copy-artifacts"] + if params.deleteNamespace then ["delete-namespace"] else [], - }, - ]; - -// Create a DAG representing the set of steps to execute on exit -local exitDag = { - name: "exit-handler", - // Construct tasks from the templates - // we will give the steps the same name as the template - dag: { - tasks: util.toArgoTaskList(exitTemplates), - }, -}; - -// A list of templates for the actual steps -local stepTemplates = std.map(function(i) i.template.argoTemplate - , dagTemplates) + - std.map(function(i) i.template.argoTemplate - , exitTemplates); - -// Define the Argo Workflow. -local workflow = { - apiVersion: "argoproj.io/v1alpha1", - kind: "Workflow", - metadata: { - name: params.name, - namespace: env.namespace, - labels: prowDict + { - workflow: params.name, - workflow_template: workflow_template, - }, - }, - spec: { - entrypoint: "e2e", - // Have argo garbage collect old workflows otherwise we overload the API server. - ttlSecondsAfterFinished: 7 * 24 * 60 * 60, - volumes: [ - { - name: "github-token", - secret: { - secretName: "github-token", - }, - }, - { - name: "gcp-credentials", - secret: { - secretName: "kubeflow-testing-credentials", - }, - }, - { - name: params.dataVolume, - persistentVolumeClaim: { - claimName: params.nfsVolumeClaim, - }, - }, - ], // volumes - - // onExit specifies the template that should always run when the workflow completes. - onExit: "exit-handler", - - // The templates will be a combination of the templates - // defining the dags executed by Argo as well as the templates - // for the individual steps. - templates: [dag, exitDag] + stepTemplates, // templates - }, // spec -}; // workflow - -std.prune(k.core.v1.list.new([workflow])) diff --git a/xgboost_synthetic/Dockerfile b/xgboost_synthetic/Dockerfile index 06a0ca7e..944b9fd2 100644 --- a/xgboost_synthetic/Dockerfile +++ b/xgboost_synthetic/Dockerfile @@ -3,7 +3,12 @@ # This docker image is based on existing notebook image # It also includes the dependencies required for training and deploying # this way we can use it as the base image -FROM gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0 +# Note: when using ClusterBuilder I believe the base image gets over written +FROM gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-cpu:v0.7.0 COPY requirements.txt . + +# We want to install the requirements in the system directory so we need to switch to root +USER root RUN pip3 --no-cache-dir install -r requirements.txt +USER jovyan diff --git a/xgboost_synthetic/build-train-deploy.ipynb b/xgboost_synthetic/build-train-deploy.ipynb index 7ae63f2e..a0b3a83b 100644 --- a/xgboost_synthetic/build-train-deploy.ipynb +++ b/xgboost_synthetic/build-train-deploy.ipynb @@ -29,20 +29,18 @@ "source": [ "### Verify we have a GCP account\n", "\n", - "* The cell below checks that this notebook was spawned with credentials to access GCP\n", - "* To add credentials when you created the notebook you should have selected add gcp credential as shown below\n", - " ![add credential](images/addgcpsecret.png)\n" + "* The cell below checks that this notebook was spawned with credentials to access GCP" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", - "if not os.getenv(\"GOOGLE_APPLICATION_CREDENTIALS\"):\n", - " raise ValueError(\"Notebook is missing google application credentials\")" + "from oauth2client.client import GoogleCredentials\n", + "credentials = GoogleCredentials.get_application_default()" ] }, { @@ -56,49 +54,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "pip installing requirements.txt\n", + "pip installing KFP https://storage.googleapis.com/ml-pipeline/release/0.1.32/kfp.tar.gz\n", + "pip installing fairing git+git://github.com/kubeflow/fairing.git@7c93e888c3fc98bdf5fb0140e90f6407ce7a807b\n", + "Configure docker credentials\n" + ] + } + ], "source": [ - "!pip3 install retrying" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Install a specific version of kubeflow-fairing that this example is tested against" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip3 install git+git://github.com/kubeflow/fairing.git@b3db9a548b51eea93250c662defe6470283943b3" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Perform some notebook setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "import util\n", - "from pathlib import Path\n", - "import os\n", - "\n", - "util.notebook_setup()" + "import notebook_setup\n", + "notebook_setup.notebook_setup()" ] }, { @@ -111,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -142,11 +114,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Imports not to be included in the built docker image\n", + "import util\n", "import kfp\n", "import kfp.components as comp\n", "import kfp.gcp as gcp\n", @@ -171,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -194,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -267,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -384,9 +357,90 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model_file=mockup-model.dat\n", + "[00:34:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "[0]\tvalidation_0-rmse:162.856\n", + "Will train until validation_0-rmse hasn't improved in 40 rounds.\n", + "[1]\tvalidation_0-rmse:156.25\n", + "[2]\tvalidation_0-rmse:150.238\n", + "[3]\tvalidation_0-rmse:145.026\n", + "[4]\tvalidation_0-rmse:138.321\n", + "[5]\tvalidation_0-rmse:131.554\n", + "[6]\tvalidation_0-rmse:127.809\n", + "[7]\tvalidation_0-rmse:122.574\n", + "[8]\tvalidation_0-rmse:117.394\n", + "[9]\tvalidation_0-rmse:114.842\n", + "[10]\tvalidation_0-rmse:111.601\n", + "[11]\tvalidation_0-rmse:108.426\n", + "[12]\tvalidation_0-rmse:105.283\n", + "[13]\tvalidation_0-rmse:102.916\n", + "[14]\tvalidation_0-rmse:101.126\n", + "[15]\tvalidation_0-rmse:98.9049\n", + "[16]\tvalidation_0-rmse:96.6027\n", + "[17]\tvalidation_0-rmse:94.6449\n", + "[18]\tvalidation_0-rmse:92.7175\n", + "[19]\tvalidation_0-rmse:89.821\n", + "[20]\tvalidation_0-rmse:87.785\n", + "[21]\tvalidation_0-rmse:85.8316\n", + "[22]\tvalidation_0-rmse:84.7495\n", + "[23]\tvalidation_0-rmse:83.3638\n", + "[24]\tvalidation_0-rmse:81.9553\n", + "[25]\tvalidation_0-rmse:80.1649\n", + "[26]\tvalidation_0-rmse:79.2545\n", + "[27]\tvalidation_0-rmse:77.5626\n", + "[28]\tvalidation_0-rmse:75.979\n", + "[29]\tvalidation_0-rmse:74.6956\n", + "[30]\tvalidation_0-rmse:74.1145\n", + "[31]\tvalidation_0-rmse:73.102\n", + "[32]\tvalidation_0-rmse:71.9953\n", + "[33]\tvalidation_0-rmse:71.2614\n", + "[34]\tvalidation_0-rmse:70.4738\n", + "[35]\tvalidation_0-rmse:69.6975\n", + "[36]\tvalidation_0-rmse:69.0899\n", + "[37]\tvalidation_0-rmse:68.6369\n", + "[38]\tvalidation_0-rmse:67.6392\n", + "[39]\tvalidation_0-rmse:67.153\n", + "[40]\tvalidation_0-rmse:66.8115\n", + "[41]\tvalidation_0-rmse:66.2017\n", + "[42]\tvalidation_0-rmse:65.5889\n", + "[43]\tvalidation_0-rmse:64.793\n", + "[44]\tvalidation_0-rmse:64.2622\n", + "[45]\tvalidation_0-rmse:63.75\n", + "[46]\tvalidation_0-rmse:63.0683\n", + "[47]\tvalidation_0-rmse:62.5844\n", + "[48]\tvalidation_0-rmse:62.4817\n", + "[49]\tvalidation_0-rmse:61.9615\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "mean_absolute_error=47.50\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best RMSE on eval: %.2f with %d rounds 61.961517 50\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Model export success: mockup-model.dat\n" + ] + } + ], "source": [ "model = ModelServe(model_file=\"mockup-model.dat\")\n", "model.train()" @@ -404,9 +458,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model_file not supplied; using the default\n", + "model_file=mockup-model.dat\n", + "[00:34:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" + ] + }, + { + "data": { + "text/plain": [ + "[[361.5152893066406, -99.92890930175781]]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(train_X, train_y), (test_X, test_y) =read_synthetic_input()\n", "\n", @@ -441,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -468,9 +542,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n" + ] + }, + { + "data": { + "text/plain": [ + "[PosixPath('build-train-deploy.py'),\n", + " 'mockup-model.dat',\n", + " 'xgboost_util.py',\n", + " 'requirements.txt']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from kubeflow.fairing.builders import cluster\n", "preprocessor = ConvertNotebookPreprocessorWithFire(class_name='ModelServe', notebook_file='build-train-deploy.ipynb')\n", @@ -497,15 +593,258 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Building image using cluster builder.\n", + "Creating docker context: /tmp/fairing_context_ybqvdghn\n", + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "Waiting for fairing-builder-ksmm7-gt427 to start...\n", + "Waiting for fairing-builder-ksmm7-gt427 to start...\n", + "Waiting for fairing-builder-ksmm7-gt427 to start...\n", + "Pod started running True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ERROR: logging before flag.Parse: E1025 01:42:23.499654 1 metadata.go:241] Failed to unmarshal scopes: invalid character 'h' looking for beginning of value\n", + "\u001b[36mINFO\u001b[0m[0002] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n", + "\u001b[36mINFO\u001b[0m[0002] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n", + "\u001b[33mWARN\u001b[0m[0002] Error while retrieving image from cache: getting image from path: open /cache/sha256:5aaccf0267f085afd976342a8e943a9c6cefccef5b554df4e15fa7bf15cbd7a3: no such file or directory\n", + "\u001b[36mINFO\u001b[0m[0002] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n", + "\u001b[36mINFO\u001b[0m[0002] Checking for cached layer gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933...\n", + "\u001b[36mINFO\u001b[0m[0002] No cached layer found for cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", + "\u001b[36mINFO\u001b[0m[0002] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.\n", + "\u001b[36mINFO\u001b[0m[0117] Taking snapshot of full filesystem...\n", + "\u001b[36mINFO\u001b[0m[0129] Skipping paths under /dev, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0129] Skipping paths under /etc/secrets, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0129] Skipping paths under /kaniko, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0130] Skipping paths under /proc, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0130] Skipping paths under /sys, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0131] Skipping paths under /var/run, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0202] WORKDIR /app/\n", + "\u001b[36mINFO\u001b[0m[0202] cmd: workdir\n", + "\u001b[36mINFO\u001b[0m[0202] Changed working directory to /app/\n", + "\u001b[36mINFO\u001b[0m[0202] Creating directory /app/\n", + "\u001b[36mINFO\u001b[0m[0202] Taking snapshot of files...\n", + "\u001b[36mINFO\u001b[0m[0202] ENV FAIRING_RUNTIME 1\n", + "\u001b[36mINFO\u001b[0m[0202] No files changed in this command, skipping snapshotting.\n", + "\u001b[36mINFO\u001b[0m[0202] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n", + "\u001b[36mINFO\u001b[0m[0203] COPY /app//requirements.txt /app/\n", + "\u001b[36mINFO\u001b[0m[0203] Taking snapshot of files...\n", + "\u001b[36mINFO\u001b[0m[0203] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", + "\u001b[36mINFO\u001b[0m[0203] cmd: /bin/bash\n", + "\u001b[36mINFO\u001b[0m[0203] args: [-c if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi]\n", + "Collecting fire (from -r requirements.txt (line 1))\n", + " Downloading https://files.pythonhosted.org/packages/d9/69/faeaae8687f4de0f5973694d02e9d6c3eb827636a009157352d98de1129e/fire-0.2.1.tar.gz (76kB)\n", + "Collecting gitpython (from -r requirements.txt (line 2))\n", + " Downloading https://files.pythonhosted.org/packages/aa/25/9fd9f0b05408021736a22ae73f837152c132e4ea85cdd71d186e24efec31/GitPython-3.0.4-py3-none-any.whl (454kB)\n", + "Requirement already satisfied: google-cloud-storage in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 3)) (1.14.0)\n", + "Collecting joblib (from -r requirements.txt (line 4))\n", + " Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)\n", + "Collecting kubeflow-metadata (from -r requirements.txt (line 5))\n", + " Downloading https://files.pythonhosted.org/packages/43/b4/3fa3c1a88b8c52695b33acd09189dda8c84ea582acbfd07a1d46f085828c/kubeflow_metadata-0.2.0-py3-none-any.whl (69kB)\n", + "Requirement already satisfied: numpy in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 6)) (1.16.2)\n", + "Collecting pandas (from -r requirements.txt (line 7))\n", + " Downloading https://files.pythonhosted.org/packages/86/12/08b092f6fc9e4c2552e37add0861d0e0e0d743f78f1318973caad970b3fc/pandas-0.25.2-cp36-cp36m-manylinux1_x86_64.whl (10.4MB)\n", + "Collecting retrying (from -r requirements.txt (line 8))\n", + " Downloading https://files.pythonhosted.org/packages/44/ef/beae4b4ef80902f22e3af073397f079c96969c69b2c7d52a57ea9ae61c9d/retrying-1.3.3.tar.gz\n", + "Collecting seldon-core (from -r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/62/25/442db772bc1950864756de2b7cb9f23b0ae0d0997189f3e3eb56e84ea22f/seldon_core-0.4.1-py3-none-any.whl (45kB)\n", + "Collecting sklearn (from -r requirements.txt (line 10))\n", + " Downloading https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz\n", + "Requirement already satisfied: xgboost in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 11)) (0.82)\n", + "Collecting tornado>=6.0.3 (from -r requirements.txt (line 12))\n", + " Downloading https://files.pythonhosted.org/packages/30/78/2d2823598496127b21423baffaa186b668f73cd91887fcef78b6eade136b/tornado-6.0.3.tar.gz (482kB)\n", + "Requirement already satisfied: six in /opt/conda/lib/python3.6/site-packages (from fire->-r requirements.txt (line 1)) (1.12.0)\n", + "Requirement already satisfied: termcolor in /opt/conda/lib/python3.6/site-packages (from fire->-r requirements.txt (line 1)) (1.1.0)\n", + "Collecting gitdb2>=2.0.0 (from gitpython->-r requirements.txt (line 2))\n", + " Downloading https://files.pythonhosted.org/packages/03/6c/99296f89bad2ef85626e1df9f677acbee8885bb043ad82ad3ed4746d2325/gitdb2-2.0.6-py2.py3-none-any.whl (63kB)\n", + "Requirement already satisfied: google-resumable-media>=0.3.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (0.3.2)\n", + "Requirement already satisfied: google-cloud-core<0.30dev,>=0.29.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (0.29.1)\n", + "Requirement already satisfied: google-api-core<2.0.0dev,>=1.6.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage->-r requirements.txt (line 3)) (1.9.0)\n", + "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.6/site-packages (from pandas->-r requirements.txt (line 7)) (2018.9)\n", + "Requirement already satisfied: python-dateutil>=2.6.1 in /opt/conda/lib/python3.6/site-packages (from pandas->-r requirements.txt (line 7)) (2.8.0)\n", + "Requirement already satisfied: grpcio in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (1.19.0)\n", + "Collecting Flask-OpenTracing==0.2.0 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/1d/c4/0546b854a3f42af9ef959df9bd1108903698e175e7a07c057cdfaeeef718/Flask_OpenTracing-0.2.0-py2.py3-none-any.whl\n", + "Collecting flatbuffers (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/c9/84/adf5837f96c39990bc55afdfddf460b38b4562f50341359afa32e4a98de7/flatbuffers-1.11-py2.py3-none-any.whl\n", + "Collecting minio>=4.0.9 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/ba/17/6084f63de9bd7c6d47b5aab719d6246c01d74d4aaad373e0142a666080cc/minio-5.0.1-py2.py3-none-any.whl (62kB)\n", + "Requirement already satisfied: requests in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (2.21.0)\n", + "Collecting flask-cors (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/78/38/e68b11daa5d613e3a91e4bf3da76c94ac9ee0d9cd515af9c1ab80d36f709/Flask_Cors-3.0.8-py2.py3-none-any.whl\n", + "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (5.1)\n", + "Requirement already satisfied: protobuf in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (3.7.1)\n", + "Collecting opentracing<2,>=1.2.2 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/06/c2/90b35a1abdc639a5c6000d8202c70217d60e80f5b328870efb73fda71115/opentracing-1.3.0.tar.gz\n", + "Collecting flask (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/9b/93/628509b8d5dc749656a9641f4caf13540e2cdec85276964ff8f43bbb1d3b/Flask-1.1.1-py2.py3-none-any.whl (94kB)\n", + "Collecting grpcio-opentracing (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/db/82/2fcad380697c3dab25de76ee590bcab3eb9bbfb4add916044d7e83ec2b10/grpcio_opentracing-1.1.4-py3-none-any.whl\n", + "Requirement already satisfied: tensorflow in /opt/conda/lib/python3.6/site-packages (from seldon-core->-r requirements.txt (line 9)) (1.13.1)\n", + "Collecting gunicorn>=19.9.0 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/8c/da/b8dd8deb741bff556db53902d4706774c8e1e67265f69528c14c003644e6/gunicorn-19.9.0-py2.py3-none-any.whl (112kB)\n", + "Collecting jaeger-client==3.13.0 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/c8/a2/e9bd04cd660cbdffe0598173be068be23099fbd68e7a4a89b74440509130/jaeger-client-3.13.0.tar.gz (77kB)\n", + "Collecting azure-storage-blob>=2.0.1 (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/3e/84/610f379b46d7d3c2d48eadeed6a12b6d46a43100fea70534f5992d0ac996/azure_storage_blob-2.1.0-py2.py3-none-any.whl (88kB)\n", + "Collecting redis (from seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/32/ae/28613a62eea0d53d3db3147f8715f90da07667e99baeedf1010eb400f8c0/redis-3.3.11-py2.py3-none-any.whl (66kB)\n", + "Collecting scikit-learn (from sklearn->-r requirements.txt (line 10))\n", + " Downloading https://files.pythonhosted.org/packages/a0/c5/d2238762d780dde84a20b8c761f563fe882b88c5a5fb03c056547c442a19/scikit_learn-0.21.3-cp36-cp36m-manylinux1_x86_64.whl (6.7MB)\n", + "Requirement already satisfied: scipy in /opt/conda/lib/python3.6/site-packages (from xgboost->-r requirements.txt (line 11)) (1.2.1)\n", + "Collecting smmap2>=2.0.0 (from gitdb2>=2.0.0->gitpython->-r requirements.txt (line 2))\n", + " Downloading https://files.pythonhosted.org/packages/55/d2/866d45e3a121ee15a1dc013824d58072fd5c7799c9c34d01378eb262ca8f/smmap2-2.0.5-py2.py3-none-any.whl\n", + "Requirement already satisfied: googleapis-common-protos!=1.5.4,<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (1.5.9)\n", + "Requirement already satisfied: google-auth<2.0dev,>=0.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (1.6.3)\n", + "Requirement already satisfied: setuptools>=34.0.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (40.9.0)\n", + "Requirement already satisfied: future in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (0.17.1)\n", + "Requirement already satisfied: urllib3 in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (1.24.1)\n", + "Requirement already satisfied: certifi in /opt/conda/lib/python3.6/site-packages (from minio>=4.0.9->seldon-core->-r requirements.txt (line 9)) (2019.3.9)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests->seldon-core->-r requirements.txt (line 9)) (2.8)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests->seldon-core->-r requirements.txt (line 9)) (3.0.4)\n", + "Collecting Jinja2>=2.10.1 (from flask->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/65/e0/eb35e762802015cab1ccee04e8a277b03f1d8e53da3ec3106882ec42558b/Jinja2-2.10.3-py2.py3-none-any.whl (125kB)\n", + "Requirement already satisfied: Werkzeug>=0.15 in /opt/conda/lib/python3.6/site-packages (from flask->seldon-core->-r requirements.txt (line 9)) (0.15.2)\n", + "Collecting itsdangerous>=0.24 (from flask->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/76/ae/44b03b253d6fade317f32c24d100b3b35c2239807046a4c953c7b89fa49e/itsdangerous-1.1.0-py2.py3-none-any.whl\n", + "Collecting click>=5.1 (from flask->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl (81kB)\n", + "Requirement already satisfied: tensorboard<1.14.0,>=1.13.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.13.1)\n", + "Requirement already satisfied: keras-applications>=1.0.6 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.0.7)\n", + "Requirement already satisfied: astor>=0.6.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.7.1)\n", + "Requirement already satisfied: tensorflow-estimator<1.14.0rc0,>=1.13.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.13.0)\n", + "Requirement already satisfied: gast>=0.2.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.2.2)\n", + "Requirement already satisfied: keras-preprocessing>=1.0.5 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (1.0.9)\n", + "Requirement already satisfied: wheel>=0.26 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.33.1)\n", + "Requirement already satisfied: absl-py>=0.1.6 in /opt/conda/lib/python3.6/site-packages (from tensorflow->seldon-core->-r requirements.txt (line 9)) (0.7.1)\n", + "Collecting threadloop<2,>=1 (from jaeger-client==3.13.0->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/d3/1d/8398c1645b97dc008d3c658e04beda01ede3d90943d40c8d56863cf891bd/threadloop-1.0.2.tar.gz\n", + "Collecting thrift (from jaeger-client==3.13.0->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/c6/b4/510617906f8e0c5660e7d96fbc5585113f83ad547a3989b80297ac72a74c/thrift-0.11.0.tar.gz (52kB)\n", + "Collecting azure-common>=1.1.5 (from azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/00/55/a703923c12cd3172d5c007beda0c1a34342a17a6a72779f8a7c269af0cd6/azure_common-1.1.23-py2.py3-none-any.whl\n", + "Collecting azure-storage-common~=2.1 (from azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9))\n", + " Downloading https://files.pythonhosted.org/packages/6b/a0/6794b318ce0118d1a4053bdf0149a60807407db9b710354f2b203c2f5975/azure_storage_common-2.1.0-py2.py3-none-any.whl (47kB)\n", + "Requirement already satisfied: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (3.1.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (0.2.4)\n", + "Requirement already satisfied: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (4.0)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.6/site-packages (from Jinja2>=2.10.1->flask->seldon-core->-r requirements.txt (line 9)) (1.1.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.6/site-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (3.1)\n", + "Requirement already satisfied: h5py in /opt/conda/lib/python3.6/site-packages (from keras-applications>=1.0.6->tensorflow->seldon-core->-r requirements.txt (line 9)) (2.9.0)\n", + "Requirement already satisfied: mock>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (2.0.0)\n", + "Requirement already satisfied: cryptography in /opt/conda/lib/python3.6/site-packages (from azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (2.6.1)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.1 in /opt/conda/lib/python3.6/site-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage->-r requirements.txt (line 3)) (0.4.5)\n", + "Requirement already satisfied: pbr>=0.11 in /opt/conda/lib/python3.6/site-packages (from mock>=2.0.0->tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow->seldon-core->-r requirements.txt (line 9)) (5.1.3)\n", + "Requirement already satisfied: cffi!=1.11.3,>=1.8 in /opt/conda/lib/python3.6/site-packages (from cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (1.12.2)\n", + "Requirement already satisfied: asn1crypto>=0.21.0 in /opt/conda/lib/python3.6/site-packages (from cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (0.24.0)\n", + "Requirement already satisfied: pycparser in /opt/conda/lib/python3.6/site-packages (from cffi!=1.11.3,>=1.8->cryptography->azure-storage-common~=2.1->azure-storage-blob>=2.0.1->seldon-core->-r requirements.txt (line 9)) (2.19)\n", + "fairing 0.5 has requirement tornado<6.0.0,>=5.1.1, but you'll have tornado 6.0.3 which is incompatible.\n", + "jaeger-client 3.13.0 has requirement tornado<5,>=4.3, but you'll have tornado 6.0.3 which is incompatible.\n", + "seldon-core 0.4.1 has requirement google-cloud-storage>=1.16.0, but you'll have google-cloud-storage 1.14.0 which is incompatible.\n", + "Installing collected packages: fire, smmap2, gitdb2, gitpython, joblib, kubeflow-metadata, pandas, retrying, opentracing, Jinja2, itsdangerous, click, flask, Flask-OpenTracing, flatbuffers, minio, flask-cors, grpcio-opentracing, gunicorn, tornado, threadloop, thrift, jaeger-client, azure-common, azure-storage-common, azure-storage-blob, redis, seldon-core, scikit-learn, sklearn\n", + " Running setup.py install for fire: started\n", + " Running setup.py install for fire: finished with status 'done'\n", + " Running setup.py install for retrying: started\n", + " Running setup.py install for retrying: finished with status 'done'\n", + " Running setup.py install for opentracing: started\n", + " Running setup.py install for opentracing: finished with status 'done'\n", + " Found existing installation: Jinja2 2.10\n", + " Uninstalling Jinja2-2.10:\n", + " Successfully uninstalled Jinja2-2.10\n", + " Found existing installation: tornado 5.1.1\n", + " Uninstalling tornado-5.1.1:\n", + " Successfully uninstalled tornado-5.1.1\n", + " Running setup.py install for tornado: started\n", + " Running setup.py install for tornado: finished with status 'done'\n", + " Running setup.py install for threadloop: started\n", + " Running setup.py install for threadloop: finished with status 'done'\n", + " Running setup.py install for thrift: started\n", + " Running setup.py install for thrift: finished with status 'done'\n", + " Running setup.py install for jaeger-client: started\n", + " Running setup.py install for jaeger-client: finished with status 'done'\n", + " Running setup.py install for sklearn: started\n", + " Running setup.py install for sklearn: finished with status 'done'\n", + "Successfully installed Flask-OpenTracing-0.2.0 Jinja2-2.10.3 azure-common-1.1.23 azure-storage-blob-2.1.0 azure-storage-common-2.1.0 click-7.0 fire-0.2.1 flask-1.1.1 flask-cors-3.0.8 flatbuffers-1.11 gitdb2-2.0.6 gitpython-3.0.4 grpcio-opentracing-1.1.4 gunicorn-19.9.0 itsdangerous-1.1.0 jaeger-client-3.13.0 joblib-0.14.0 kubeflow-metadata-0.2.0 minio-5.0.1 opentracing-1.3.0 pandas-0.25.2 redis-3.3.11 retrying-1.3.3 scikit-learn-0.21.3 seldon-core-0.4.1 sklearn-0.0 smmap2-2.0.5 threadloop-1.0.2 thrift-0.11.0 tornado-6.0.3\n", + "You are using pip version 19.0.1, however version 19.3.1 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\n", + "\u001b[36mINFO\u001b[0m[0240] Taking snapshot of full filesystem...\n", + "\u001b[36mINFO\u001b[0m[0241] Skipping paths under /dev, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0241] Skipping paths under /etc/secrets, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0241] Skipping paths under /kaniko, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0242] Skipping paths under /proc, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0242] Skipping paths under /sys, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0243] Skipping paths under /var/run, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/kqueue.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado-5.1.1-py3.6.egg-info\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/stack_context.py\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/Jinja2-2.10.dist-info\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/test/stack_context_test.py\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/kqueue.py\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/epoll.py\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/select.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/common.py\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/common.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/__pycache__/stack_context.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/test/__pycache__/stack_context_test.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/__pycache__/epoll.cpython-36.pyc\n", + "\u001b[36mINFO\u001b[0m[0243] Adding whiteout for /opt/conda/lib/python3.6/site-packages/tornado/platform/select.py\n", + "\u001b[36mINFO\u001b[0m[0277] Using files from context: [/kaniko/buildcontext/app]\n", + "\u001b[36mINFO\u001b[0m[0277] Pushing layer gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933 to cache now\n", + "\u001b[36mINFO\u001b[0m[0277] COPY /app/ /app/\n", + "\u001b[36mINFO\u001b[0m[0277] Taking snapshot of files...\n", + "2019/10/25 01:47:01 pushed blob sha256:671fd5dc4379ffdf4694c30fd98b8b6bae9213cdff0939b936debf0f22f78708\n", + "2019/10/25 01:47:05 pushed blob sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770\n", + "2019/10/25 01:47:05 gcr.io/jlewi-dev/fairing-job/fairing-job/cache:864fc6b813659edb48dd37b06d234c939c364db3e60df63a7de4e13b3174f933: digest: sha256:2339a62186a93347f3bb9bc85456045d45dc9152793ccc5164210b58aab5512b size: 429\n", + "2019/10/25 01:47:05 existing blob: sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573\n", + "2019/10/25 01:47:05 existing blob: sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c\n", + "2019/10/25 01:47:05 existing blob: sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d\n", + "2019/10/25 01:47:05 existing blob: sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323\n", + "2019/10/25 01:47:05 existing blob: sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854\n", + "2019/10/25 01:47:05 existing blob: sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8\n", + "2019/10/25 01:47:05 existing blob: sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc\n", + "2019/10/25 01:47:05 existing blob: sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770\n", + "2019/10/25 01:47:05 existing blob: sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d\n", + "2019/10/25 01:47:05 existing blob: sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c\n", + "2019/10/25 01:47:05 existing blob: sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6\n", + "2019/10/25 01:47:05 existing blob: sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc\n", + "2019/10/25 01:47:05 existing blob: sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac\n", + "2019/10/25 01:47:05 existing blob: sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0\n", + "2019/10/25 01:47:05 existing blob: sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e\n", + "2019/10/25 01:47:05 existing blob: sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245\n", + "2019/10/25 01:47:05 existing blob: sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c\n", + "2019/10/25 01:47:05 existing blob: sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1\n", + "2019/10/25 01:47:05 existing blob: sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be\n", + "2019/10/25 01:47:05 existing blob: sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177\n", + "2019/10/25 01:47:05 existing blob: sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79\n", + "2019/10/25 01:47:05 existing blob: sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce\n", + "2019/10/25 01:47:07 pushed blob sha256:80d3506bc094600aada9dc076b44354b134277700f2420838db7b742c50533ed\n", + "2019/10/25 01:47:07 pushed blob sha256:2e67912c44ec0aadea8c990a4a8fc882e4655a798807840977b49b5a972eb47d\n", + "2019/10/25 01:47:07 pushed blob sha256:2f2b9c4bf759eaf2afb42e189cc50b21d4614d1892227349409d012a90355268\n", + "2019/10/25 01:47:07 pushed blob sha256:5831cf619d1fb5d7b9430a0943017516edf2d83451941d468c78479b73f65975\n", + "2019/10/25 01:47:08 gcr.io/jlewi-dev/fairing-job/fairing-job:A486B058: digest: sha256:bf1c54b7880b81f232c15f31a0af74a70550e2eedffd2c9ff289f32f4b8d85fa size: 4325\n" + ] + } + ], "source": [ "# Use a stock jupyter image as our base image\n", + "# TODO(jlewi): Should we try to use the downward API to default to the image we are running in?\n", + "# TODO(https://github.com/kubeflow/fairing/issues/404): We need to fix 404\n", + "# before we can upgrade to the 0.7.0 image as the base image.\n", + "# We will need to use that to set the Dockerfile used by ClusterBuilder\n", + "# base_image = \"gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-cpu:v0.7.0\"\n", "base_image = \"gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\"\n", - "\n", "cluster_builder = cluster.cluster.ClusterBuilder(registry=DOCKER_REGISTRY,\n", " base_image=base_image,\n", " preprocessor=preprocessor,\n", @@ -538,11 +877,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "Building image using Append builder...\n", + "Creating docker context: /tmp/fairing_context_41v9y1k9\n", + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "build-train-deploy.py already exists in Fairing context, skipping...\n", + "Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:A486B058'\n", + "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", + "Successfully obtained Docker credentials.\n", + "Image successfully built in 2.0983306730049662s.\n", + "Pushing image gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7...\n", + "Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7'\n", + "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", + "Successfully obtained Docker credentials.\n", + "Uploading gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n", + "Layer sha256:80d3506bc094600aada9dc076b44354b134277700f2420838db7b742c50533ed exists, skipping\n", + "Layer sha256:8da7bddc0c459ae3160be07163f4012ef7befef6ae05c198bead57633e46e770 exists, skipping\n", + "Layer sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d exists, skipping\n", + "Layer sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be exists, skipping\n", + "Layer sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac exists, skipping\n", + "Layer sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79 exists, skipping\n", + "Layer sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c exists, skipping\n", + "Layer sha256:5831cf619d1fb5d7b9430a0943017516edf2d83451941d468c78479b73f65975 exists, skipping\n", + "Layer sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854 exists, skipping\n", + "Layer sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6 exists, skipping\n", + "Layer sha256:2f2b9c4bf759eaf2afb42e189cc50b21d4614d1892227349409d012a90355268 exists, skipping\n", + "Layer sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c exists, skipping\n", + "Layer sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc exists, skipping\n", + "Layer sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323 exists, skipping\n", + "Layer sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1 exists, skipping\n", + "Layer sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d exists, skipping\n", + "Layer sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0 exists, skipping\n", + "Layer sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177 exists, skipping\n", + "Layer sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e exists, skipping\n", + "Layer sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8 exists, skipping\n", + "Layer sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc exists, skipping\n", + "Layer sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573 exists, skipping\n", + "Layer sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245 exists, skipping\n", + "Layer sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c exists, skipping\n", + "Layer sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce exists, skipping\n", + "Layer sha256:a4bc27d300aa1fec30a6da6b44b05c58052675425cb5b92e11cc081dec5af3aa pushed.\n", + "Layer sha256:f2b9523fe427b5599019aff069e474f3a7bcd829aeb084a174cd8610df588068 pushed.\n", + "Finished upload of: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n", + "Pushed image gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7 in 3.5974774019996403s.\n" + ] + } + ], "source": [ "preprocessor.preprocess()\n", "\n", @@ -570,9 +961,83 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The job fairing-job-qg87g launched.\n", + "Waiting for fairing-job-qg87g-chghc to start...\n", + "Waiting for fairing-job-qg87g-chghc to start...\n", + "Waiting for fairing-job-qg87g-chghc to start...\n", + "Pod started running True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model_file not supplied; using the default\n", + "model_file=mockup-model.dat\n", + "[0]\tvalidation_0-rmse:154.15\n", + "Will train until validation_0-rmse hasn't improved in 40 rounds.\n", + "[1]\tvalidation_0-rmse:147.275\n", + "[2]\tvalidation_0-rmse:140.414\n", + "[3]\tvalidation_0-rmse:135.407\n", + "[4]\tvalidation_0-rmse:131.662\n", + "[5]\tvalidation_0-rmse:127.103\n", + "[6]\tvalidation_0-rmse:123.558\n", + "[7]\tvalidation_0-rmse:118.619\n", + "[8]\tvalidation_0-rmse:115.743\n", + "[9]\tvalidation_0-rmse:112.866\n", + "[10]\tvalidation_0-rmse:110.533\n", + "[11]\tvalidation_0-rmse:108.57\n", + "[12]\tvalidation_0-rmse:107.407\n", + "[13]\tvalidation_0-rmse:104.548\n", + "[14]\tvalidation_0-rmse:102.625\n", + "[15]\tvalidation_0-rmse:100.668\n", + "[16]\tvalidation_0-rmse:99.4654\n", + "[17]\tvalidation_0-rmse:98.1461\n", + "[18]\tvalidation_0-rmse:96.71\n", + "[19]\tvalidation_0-rmse:95.4135\n", + "[20]\tvalidation_0-rmse:94.4105\n", + "[21]\tvalidation_0-rmse:92.6454\n", + "[22]\tvalidation_0-rmse:91.5752\n", + "[23]\tvalidation_0-rmse:90.4496\n", + "[24]\tvalidation_0-rmse:89.9257\n", + "[25]\tvalidation_0-rmse:88.8438\n", + "[26]\tvalidation_0-rmse:87.9895\n", + "[27]\tvalidation_0-rmse:86.42\n", + "[28]\tvalidation_0-rmse:85.2992\n", + "[29]\tvalidation_0-rmse:84.6414\n", + "[30]\tvalidation_0-rmse:84.3974\n", + "[31]\tvalidation_0-rmse:83.2113\n", + "[32]\tvalidation_0-rmse:82.5043\n", + "[33]\tvalidation_0-rmse:81.3713\n", + "[34]\tvalidation_0-rmse:81.2969\n", + "[35]\tvalidation_0-rmse:79.9762\n", + "[36]\tvalidation_0-rmse:79.084\n", + "[37]\tvalidation_0-rmse:78.8726\n", + "[38]\tvalidation_0-rmse:78.2066\n", + "[39]\tvalidation_0-rmse:77.98\n", + "[40]\tvalidation_0-rmse:76.8601\n", + "[41]\tvalidation_0-rmse:76.3929\n", + "[42]\tvalidation_0-rmse:76.0857\n", + "[43]\tvalidation_0-rmse:75.4714\n", + "[44]\tvalidation_0-rmse:74.4059\n", + "[45]\tvalidation_0-rmse:73.5268\n", + "[46]\tvalidation_0-rmse:73.0309\n", + "[47]\tvalidation_0-rmse:72.4982\n", + "[48]\tvalidation_0-rmse:71.9351\n", + "[49]\tvalidation_0-rmse:71.3068\n", + "mean_absolute_error=50.72\n", + "Model export success: mockup-model.dat\n", + "Best RMSE on eval: %.2f with %d rounds 71.306808 50\n" + ] + } + ], "source": [ "pod_spec = builder.generate_pod_spec()\n", "train_deployer = job.job.Job(cleanup=False,\n", @@ -593,9 +1058,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "apiVersion: v1\n", + "items:\n", + "- apiVersion: batch/v1\n", + " kind: Job\n", + " metadata:\n", + " creationTimestamp: \"2019-10-25T01:48:20Z\"\n", + " generateName: fairing-job-\n", + " labels:\n", + " fairing-deployer: job\n", + " fairing-id: 85da7b32-f6c9-11e9-8e34-46c1cdc3ff41\n", + " name: fairing-job-qg87g\n", + " namespace: kubeflow-jlewi\n", + " resourceVersion: \"625626\"\n", + " selfLink: /apis/batch/v1/namespaces/kubeflow-jlewi/jobs/fairing-job-qg87g\n", + " uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n", + " spec:\n", + " backoffLimit: 0\n", + " completions: 1\n", + " parallelism: 1\n", + " selector:\n", + " matchLabels:\n", + " controller-uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n", + " template:\n", + " metadata:\n", + " annotations:\n", + " sidecar.istio.io/inject: \"false\"\n", + " creationTimestamp: null\n", + " labels:\n", + " controller-uid: 85df016a-f6c9-11e9-8cd6-42010a8e012b\n", + " fairing-deployer: job\n", + " fairing-id: 85da7b32-f6c9-11e9-8e34-46c1cdc3ff41\n", + " job-name: fairing-job-qg87g\n", + " name: fairing-deployer\n", + " spec:\n", + " containers:\n", + " - command:\n", + " - python\n", + " - /app/build-train-deploy.py\n", + " - train\n", + " env:\n", + " - name: FAIRING_RUNTIME\n", + " value: \"1\"\n", + " - name: GOOGLE_APPLICATION_CREDENTIALS\n", + " value: /etc/secrets/user-gcp-sa.json\n", + " image: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n", + " imagePullPolicy: IfNotPresent\n", + " name: fairing-job\n", + " resources: {}\n", + " securityContext:\n", + " runAsUser: 0\n", + " terminationMessagePath: /dev/termination-log\n", + " terminationMessagePolicy: File\n", + " volumeMounts:\n", + " - mountPath: /etc/secrets\n", + " name: user-gcp-sa\n", + " readOnly: true\n", + " workingDir: /app/\n", + " dnsPolicy: ClusterFirst\n", + " restartPolicy: Never\n", + " schedulerName: default-scheduler\n", + " securityContext: {}\n", + " terminationGracePeriodSeconds: 30\n", + " volumes:\n", + " - name: user-gcp-sa\n", + " secret:\n", + " defaultMode: 420\n", + " secretName: user-gcp-sa\n", + " status:\n", + " completionTime: \"2019-10-25T01:48:29Z\"\n", + " conditions:\n", + " - lastProbeTime: \"2019-10-25T01:48:29Z\"\n", + " lastTransitionTime: \"2019-10-25T01:48:29Z\"\n", + " status: \"True\"\n", + " type: Complete\n", + " startTime: \"2019-10-25T01:48:20Z\"\n", + " succeeded: 1\n", + "kind: List\n", + "metadata:\n", + " resourceVersion: \"\"\n", + " selfLink: \"\"\n" + ] + } + ], "source": [ "!kubectl get jobs -l fairing-id={train_deployer.job_id} -o yaml" ] @@ -620,9 +1172,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Cluster endpoint: http://fairing-service-2bhtr.kubeflow-jlewi.svc.cluster.local:5000/predict\n" + ] + } + ], "source": [ "from kubeflow.fairing.deployers import serving\n", "pod_spec = builder.generate_pod_spec()\n", @@ -644,9 +1204,100 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "apiVersion: extensions/v1beta1\n", + "kind: Deployment\n", + "metadata:\n", + " annotations:\n", + " deployment.kubernetes.io/revision: \"1\"\n", + " creationTimestamp: \"2019-10-25T01:48:34Z\"\n", + " generateName: fairing-deployer-\n", + " generation: 1\n", + " labels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n", + " name: fairing-deployer-cnv5x\n", + " namespace: kubeflow-jlewi\n", + " resourceVersion: \"625670\"\n", + " selfLink: /apis/extensions/v1beta1/namespaces/kubeflow-jlewi/deployments/fairing-deployer-cnv5x\n", + " uid: 8e43b5b8-f6c9-11e9-8cd6-42010a8e012b\n", + "spec:\n", + " progressDeadlineSeconds: 600\n", + " replicas: 1\n", + " revisionHistoryLimit: 10\n", + " selector:\n", + " matchLabels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n", + " strategy:\n", + " rollingUpdate:\n", + " maxSurge: 25%\n", + " maxUnavailable: 25%\n", + " type: RollingUpdate\n", + " template:\n", + " metadata:\n", + " annotations:\n", + " sidecar.istio.io/inject: \"false\"\n", + " creationTimestamp: null\n", + " labels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: 8e428b7a-f6c9-11e9-8e34-46c1cdc3ff41\n", + " name: fairing-deployer\n", + " spec:\n", + " containers:\n", + " - command:\n", + " - seldon-core-microservice\n", + " - build-train-deploy.ModelServe\n", + " - REST\n", + " - --service-type=MODEL\n", + " - --persistence=0\n", + " env:\n", + " - name: FAIRING_RUNTIME\n", + " value: \"1\"\n", + " image: gcr.io/jlewi-dev/fairing-job/fairing-job:7935B6A7\n", + " imagePullPolicy: IfNotPresent\n", + " name: model\n", + " resources: {}\n", + " securityContext:\n", + " runAsUser: 0\n", + " terminationMessagePath: /dev/termination-log\n", + " terminationMessagePolicy: File\n", + " workingDir: /app/\n", + " dnsPolicy: ClusterFirst\n", + " restartPolicy: Always\n", + " schedulerName: default-scheduler\n", + " securityContext: {}\n", + " terminationGracePeriodSeconds: 30\n", + "status:\n", + " conditions:\n", + " - lastTransitionTime: \"2019-10-25T01:48:34Z\"\n", + " lastUpdateTime: \"2019-10-25T01:48:34Z\"\n", + " message: Deployment does not have minimum availability.\n", + " reason: MinimumReplicasUnavailable\n", + " status: \"False\"\n", + " type: Available\n", + " - lastTransitionTime: \"2019-10-25T01:48:34Z\"\n", + " lastUpdateTime: \"2019-10-25T01:48:35Z\"\n", + " message: ReplicaSet \"fairing-deployer-cnv5x-744dc89c56\" is progressing.\n", + " reason: ReplicaSetUpdated\n", + " status: \"True\"\n", + " type: Progressing\n", + " observedGeneration: 1\n", + " replicas: 1\n", + " unavailableReplicas: 1\n", + " updatedReplicas: 1\n" + ] + } + ], "source": [ "!kubectl get deploy -o yaml {deployer.deployment.metadata.name}" ] @@ -664,7 +1315,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -673,9 +1324,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'util' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mfull_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\":5000/predict\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_nparray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfull_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_X\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'util' is not defined" + ] + } + ], "source": [ "full_url = url + \":5000/predict\"\n", "result = util.predict_nparray(full_url, test_X)\n", @@ -869,9 +1532,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.8" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/xgboost_synthetic/notebook_setup.py b/xgboost_synthetic/notebook_setup.py new file mode 100644 index 00000000..6dcf9310 --- /dev/null +++ b/xgboost_synthetic/notebook_setup.py @@ -0,0 +1,45 @@ +"""Some routines to setup the notebook. + +This is separated out from util.py because this module installs some of the pip packages +that util depends on. +""" +import sys +import logging +import os +import subprocess + +from pathlib import Path + +KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.32/kfp.tar.gz' +FAIRING_PACKAGE = 'git+git://github.com/kubeflow/fairing.git@7c93e888c3fc98bdf5fb0140e90f6407ce7a807b' # pylint: disable=line-too-long + +def notebook_setup(): + # Install the SDK + logging.basicConfig(format='%(message)s') + logging.getLogger().setLevel(logging.INFO) + + logging.info("pip installing requirements.txt") + subprocess.check_call(["pip3", "install", "--user", "-r", "requirements.txt"]) + logging.info("pip installing KFP %s", KFP_PACKAGE) + subprocess.check_call(["pip3", "install", "--user", KFP_PACKAGE, "--upgrade"]) + logging.info("pip installing fairing %s", FAIRING_PACKAGE) + subprocess.check_call(["pip3", "install", "--user", FAIRING_PACKAGE]) + + logging.info("Configure docker credentials") + subprocess.check_call(["gcloud", "auth", "configure-docker", "--quiet"]) + if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): + logging.info("Activating service account") + subprocess.check_call(["gcloud", "auth", "activate-service-account", + "--key-file=" + + os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), + "--quiet"]) + + home = str(Path.home()) + + # Installing the python packages locally doesn't appear to have them automatically + # added the path so we need to manually add the directory + local_py_path = os.path.join(home, ".local/lib/python3.6/site-packages") + if local_py_path not in sys.path: + logging.info("Adding %s to python path", local_py_path) + # Insert at front because we want to override any installed packages + sys.path.insert(0, local_py_path) diff --git a/xgboost_synthetic/testing/conftest.py b/xgboost_synthetic/testing/conftest.py new file mode 100644 index 00000000..6e7f0017 --- /dev/null +++ b/xgboost_synthetic/testing/conftest.py @@ -0,0 +1,39 @@ +import pytest + +def pytest_addoption(parser): + parser.addoption( + "--name", help="Name for the job. If not specified one was created " + "automatically", type=str, default="") + parser.addoption( + "--namespace", help="The namespace for the application", type=str, + default="kubeflow-test-infra") + parser.addoption( + "--image", help="Notebook image to use", type=str, + default="gcr.io/kubeflow-images-public/tensorflow-1.14.0-notebook-gpu" + ":v0.7.0") + parser.addoption( + "--repos", help="The repos to checkout; leave blank to use defaults", + type=str, default="") + parser.addoption( + "--cluster", help="The cluster which the applition running in", type=str, + default="") + +@pytest.fixture +def name(request): + return request.config.getoption("--name") + +@pytest.fixture +def namespace(request): + return request.config.getoption("--namespace") + +@pytest.fixture +def image(request): + return request.config.getoption("--image") + +@pytest.fixture +def repos(request): + return request.config.getoption("--repos") + +@pytest.fixture +def cluster(request): + return request.config.getoption("--cluster") diff --git a/xgboost_synthetic/testing/job.yaml b/xgboost_synthetic/testing/job.yaml index 1712b307..fd7cc81f 100644 --- a/xgboost_synthetic/testing/job.yaml +++ b/xgboost_synthetic/testing/job.yaml @@ -1,10 +1,15 @@ apiVersion: batch/v1 kind: Job metadata: - name: $(job_name) + name: xgboost-test spec: template: metadata: + annotations: + # TODO(jlewi): Do we really want to disable sidecar injection + # in the test? Would it be better to use istio to mimic what happens + # in notebooks? + sidecar.istio.io/inject: "false" labels: app: xgboost-synthetics-testing spec: @@ -17,8 +22,10 @@ spec: - /usr/local/bin/checkout_repos.sh - --repos=kubeflow/examples@$(CHECK_TAG) - --src_dir=/src + # TODO(jlewi): Do we need to do depth all here? - --depth=all name: checkout + # TODO(jlewi): Set in kustomization.yaml? image: gcr.io/kubeflow-ci/test-worker:v20190802-c6f9140-e3b0c4 volumeMounts: - mountPath: /src @@ -35,17 +42,9 @@ spec: command: ["python3", "execute_notebook.py"] workingDir: /src/kubeflow/examples/xgboost_synthetic/testing volumeMounts: - - mountPath: /var/secrets - name: user-gcp-sa - readOnly: true - mountPath: /src name: src - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /var/secrets/user-gcp-sa.json + serviceAccount: default-editor volumes: - - name: user-gcp-sa - secret: - secretName: user-gcp-sa - name: src emptyDir: {} diff --git a/xgboost_synthetic/testing/kustomization.yaml b/xgboost_synthetic/testing/kustomization.yaml deleted file mode 100644 index 96825e57..00000000 --- a/xgboost_synthetic/testing/kustomization.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: kubeflow - -generatorOptions: - disableNameSuffixHash: true - -resources: -- job.yaml -- role.yaml -- rolebinding.yaml - -configurations: -- params.yaml - -configMapGenerator: -- name: xgb-notebooks-tests - literals: - -vars: -- fieldref: - fieldPath: data.name - name: job_name - objref: - apiVersion: v1 - kind: ConfigMap - name: xgb-notebooks-tests diff --git a/xgboost_synthetic/testing/params.yaml b/xgboost_synthetic/testing/params.yaml deleted file mode 100644 index 9ddcd1c9..00000000 --- a/xgboost_synthetic/testing/params.yaml +++ /dev/null @@ -1,3 +0,0 @@ -varReference: -- path: metadata/name - kind: Job diff --git a/xgboost_synthetic/testing/rolebinding.yaml b/xgboost_synthetic/testing/rolebinding.yaml deleted file mode 100644 index b844af21..00000000 --- a/xgboost_synthetic/testing/rolebinding.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - labels: - app: xgboost-synthetics-testing - name: xgboost-synthetics-testing-role-binding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: xgboost-synthetics-testing-role -subjects: -- kind: Group - name: system:serviceaccounts - apiGroup: rbac.authorization.k8s.io diff --git a/xgboost_synthetic/testing/xgboost_test.py b/xgboost_synthetic/testing/xgboost_test.py index e173fa66..d96f0349 100644 --- a/xgboost_synthetic/testing/xgboost_test.py +++ b/xgboost_synthetic/testing/xgboost_test.py @@ -1,100 +1,125 @@ -import argparse +import datetime import logging import os -import subprocess import time +import uuid +import yaml +import pytest + +from kubernetes import client as k8s_client +from kubernetes.client import rest from kubeflow.testing import util -def create_job(args, app_dir): #pylint:disable=redefined-outer-name +# TODO(jlewi): This test is currently failing because various things +# need to be updated to work with 0.7.0. Until that's fixed we mark it +# as expected to fail so we can begin to get signal. +@pytest.mark.xfail +def test_xgboost_synthetic(record_xml_attribute, name, namespace, cluster, # pylint: disable=too-many-branches,too-many-statements + repos, image, app_dir): '''Generate Job and summit.''' - util.run(['gcloud', 'auth', 'activate-service-account', - "--key-file=/secret/gcp-credentials/key.json"], cwd=app_dir) - util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container', - "clusters", "get-credentials", "--zone=us-east1-b", args.cluster], cwd=app_dir) - - configmap = 'xgb-notebooks-tests' - util.run(['kustomize', 'edit', 'add', 'configmap', configmap, - '--from-literal=name=' + args.name], cwd=app_dir) - # For presubmit, set the checkout tag as HEAD:$(PULL_NUMBER), others set to PULL_BASE_SHA - if args.jobType == 'presubmit': - util.run(['kustomize', 'edit', 'add', 'configmap', configmap, - '--from-literal=checkTag=HEAD:' + args.pullNumber], cwd=app_dir) - else: - util.run(['kustomize', 'edit', 'add', 'configmap', configmap, - '--from-literal=checkTag=' + args.pullBaseSHA], cwd=app_dir) - util.run(['kustomize', 'edit', 'set', 'namespace', args.namespace], cwd=app_dir) - util.run(['kustomize', 'edit', 'set', 'image', 'execute-image=' + args.image], cwd=app_dir) - util.run(['kustomize', 'build', app_dir, '-o', 'generated.yaml'], cwd=app_dir) - util.run(['kubectl', 'apply', '-f', 'generated.yaml'], cwd=app_dir) - logging.info("Created job %s in namespaces %s", args.name, args.namespace) - -def get_pod_logs(name, namespace, app_dir): #pylint:disable=redefined-outer-name - '''Cannot get logs by k8s python api, using kubectl command to get logs.''' - logging.info("Getting pod %s logs...", name) - util.run(['kubectl', 'logs', name, '-n', namespace], cwd=app_dir) - -def check_job_status(namespace, app_dir): #pylint:disable=redefined-outer-name - '''Cannot get job by k8s python api, using kubectl command to check job status.''' - is_successed = False - pod_info, pod_name, pod_status = '', '', '' - for _ in range(0, 30): - time.sleep(60) - subCmd = "kubectl get pod -n " + namespace + " | grep -m1 xgboost-test" - pod_info = subprocess.run(subCmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=True, - universal_newlines=True) - if len(str(pod_info.stdout).split()) >= 2: - pod_name = str(pod_info.stdout).split()[0] - pod_status = str(pod_info.stdout).split()[2] - - if pod_name: - if pod_status == "Pending": - logging.info("Pod %s is Pending.", pod_name) - elif pod_status == "Running": - logging.info("Pod %s is Running.", pod_name) - elif pod_status == "Completed": - logging.info("Pod %s is Completed.", pod_name) - get_pod_logs(pod_name, namespace, app_dir) - is_successed = True - break - elif pod_status == "Error": - get_pod_logs(pod_name, namespace, app_dir) - raise RuntimeError("Failed to execute notebook.") - else: - logging.warning("Pod %s status %s.", pod_name, pod_status) - else: - logging.warning("Cannot get the pod name, retry after 60 seconds.") - - if not is_successed: - raise RuntimeError("Timeout to get the executing notebook pod after 30 munites.") - - -if __name__ == "__main__": - - logging.basicConfig(level=logging.INFO) - - parser = argparse.ArgumentParser() - parser.add_argument( - "--name", help="deploy application name", type=str, required=True) - parser.add_argument( - "--namespace", help="The namespace for the application", type=str, required=True) - parser.add_argument( - "--image", help="Image name for the application", type=str, required=True) - parser.add_argument( - "--pullNumber", help="The PR number", type=str, required=True) - parser.add_argument( - "--pullBaseSHA", help="The pull base SHA", type=str, required=True) - parser.add_argument( - "--jobType", help="The job type such as presubmit or postsubmit", type=str, required=True) - parser.add_argument( - "--cluster", help="The cluster which the applition running in", type=str, required=True) - - app_dir = os.path.dirname(__file__) + util.set_pytest_junit(record_xml_attribute, "test_xgboost_synthetic") app_dir = os.path.abspath(app_dir) - args = parser.parse_args() - create_job(args, app_dir) - check_job_status(args.namespace, app_dir) + if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): + util.run(['gcloud', 'auth', 'activate-service-account', + "--key-file=" + os.getenv("GOOGLE_APPLICATION_CREDENTIALS")], + cwd=app_dir) + + # TODO(jlewi): We should just assume that kubeconfig has been set. + if cluster: + util.run(['gcloud', '--project=kubeflow-ci-deployment', 'container', + "clusters", "get-credentials", "--zone=us-east1-b", cluster], + cwd=app_dir) + + with open("job.yaml") as hf: + job = yaml.load(hf) + + # We need to checkout the correct version of the code + # in presubmits and postsubmits. We should check the environment variables + # for the prow environment variables to get the appropriate values. + # We should probably also only do that if the + # See + # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables + if not repos: + version = "@HEAD" + if os.getenv("PULL_NUMBER"): + version = "@{0}:{1}".format(os.getenv("PULL_PULL_SHA"), + os.getenv("PULL_NUMBER")) + + else: + if os.getenv("PULL_BASE_SHA"): + version = "@{0}".format(os.getenv("PULL_BASE_SHA")) + + repos = "kubeflow/examples" + version + + logging.info("Repos set to %s", repos) + job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [ + "/usr/local/bin/checkout_repos.sh", + "--repos=" + repos, + "--src_dir=/src", + "--depth=all", + ] + job["spec"]["template"]["spec"]["containers"][0]["image"] = image + util.load_kube_config(persist_config=False) + + if name: + job["metadata"]["name"] = name + else: + job["metadata"]["name"] = ("xgboost-test-" + + datetime.datetime.now().strftime("%H%M%S") + + "-" + uuid.uuid4().hex[0:3]) + + job["metadata"]["namespace"] = namespace + + # Create an API client object to talk to the K8s master. + api_client = k8s_client.ApiClient() + batch_api = k8s_client.BatchV1Api(api_client) + + logging.info("Creating job:\n%s", yaml.dump(job)) + batch_api.create_namespaced_job(job["metadata"]["namespace"], job) + logging.info("Created job %s in namespaces %s", name, namespace) + + # Wait for job. + end_time = datetime.datetime.now() + datetime.timedelta( + minutes=15) + + last_condition = None + while datetime.datetime.now() < end_time: + try: + job = batch_api.read_namespaced_job(name, namespace) + except rest.ApiException as e: + logging.error("There was a problem getting job %s.%s; %s", + namespace, name, e) + time.sleep(10) + continue + # ready_replicas could be None + if not job.conditions: + logging.info("Job missing condition") + time.sleep(10) + continue + + last_condition = job.conditions[-1] + if last_condition["type"] in ["Failed", "Complete"]: + break + logging.info("Waiting for job %s.%s", namespace, name) + time.sleep(10) + + logging.info("Final Job spec:\n%s", yaml.safe_dump(job)) + util.run(["kubectl", "describe", "job", "-n", namespace, name]) + + if not last_condition or last_condition["type"] not in ["Failed", "Complete"]: + logging.error("Timeout waiting for job %s.%s to finish.", namespace, name) + raise RuntimeError("Job {0}.{1} has last condition {2} which is not " + "Complete".format(namespace, name, + last_condition["type"] in ["Failed", "Complete"])) + assert last_condition["type"] == "Complete" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, + format=('%(levelname)s|%(asctime)s' + '|%(pathname)s|%(lineno)d| %(message)s'), + datefmt='%Y-%m-%dT%H:%M:%S', + ) + logging.getLogger().setLevel(logging.INFO) + pytest.main() diff --git a/xgboost_synthetic/util.py b/xgboost_synthetic/util.py index 41cb1b55..ae818f43 100644 --- a/xgboost_synthetic/util.py +++ b/xgboost_synthetic/util.py @@ -1,27 +1,11 @@ -import logging import os -import shutil -import subprocess import json +import shutil import requests + from retrying import retry import numpy as np -KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.20/kfp.tar.gz' -def notebook_setup(): - # Install the SDK - - subprocess.check_call(["pip3", "install", "-r", "requirements.txt"]) - subprocess.check_call(["pip3", "install", KFP_PACKAGE, "--upgrade"]) - - logging.basicConfig(format='%(message)s') - logging.getLogger().setLevel(logging.INFO) - - subprocess.check_call(["gcloud", "auth", "configure-docker", "--quiet"]) - subprocess.check_call(["gcloud", "auth", "activate-service-account", - "--key-file=" + os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), - "--quiet"]) - def copy_data_to_nfs(nfs_path, model_dir): if not os.path.exists(nfs_path): shutil.copytree("ames_dataset", nfs_path)