diff --git a/xgboost_synthetic/.gitignore b/xgboost_synthetic/.gitignore index 868bfd22..738524f3 100644 --- a/xgboost_synthetic/.gitignore +++ b/xgboost_synthetic/.gitignore @@ -4,4 +4,6 @@ **/__pycache__ *.zip mlpipeline-metrics.json -mlpipeline-ui-metadata.json \ No newline at end of file +mlpipeline-ui-metadata.json +build-train-deploy.py +**/.dat \ No newline at end of file diff --git a/xgboost_synthetic/Dockerfile b/xgboost_synthetic/Dockerfile index 51b6872a..06a0ca7e 100644 --- a/xgboost_synthetic/Dockerfile +++ b/xgboost_synthetic/Dockerfile @@ -3,21 +3,7 @@ # This docker image is based on existing notebook image # It also includes the dependencies required for training and deploying # this way we can use it as the base image -FROM gcr.io/kubeflow-images-public/tensorflow-1.12.0-notebook-cpu:v0.5.0 - -USER root +FROM gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0 COPY requirements.txt . RUN pip3 --no-cache-dir install -r requirements.txt - -RUN apt-get update -y -RUN apt-get install -y emacs - -RUN pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.20/kfp.tar.gz - -# Checkout kubeflow/testing because we use some of its utilities -RUN mkdir -p /src/kubeflow && \ - cd /src/kubeflow && \ - git clone https://github.com/kubeflow/testing.git testing - -USER jovyan diff --git a/xgboost_synthetic/build-train-deploy.ipynb b/xgboost_synthetic/build-train-deploy.ipynb index 2db84a32..25cd6b1e 100644 --- a/xgboost_synthetic/build-train-deploy.ipynb +++ b/xgboost_synthetic/build-train-deploy.ipynb @@ -6,43 +6,192 @@ "source": [ "# Train and deploy on Kubeflow from Notebooks\n", "\n", - "This notebook introduces you to using Kubeflow Fairing to train and deploy a model to Kubeflow on Google Kubernetes Engine (GKE), and Kubeflow Pipeline to build a simple pipeline and deploy on GKE. This notebook demonstrate how to:\n", + "This notebook shows you how to use Kubeflow to build, train, and deploy models on Kubernetes.\n", + "This notebook walks you through the following\n", " \n", - "* Train an XGBoost model in a local notebook,\n", - "* Use Kubeflow Fairing to train an XGBoost model remotely on Kubeflow,\n", - " * For simplicity code-generated synthetic data is used.\n", - " * The append builder is used to rapidly build a docker image.\n", - "* Use Kubeflow Fairing to deploy a trained model to Kubeflow, and Call the deployed endpoint for predictions.\n", - "* Use a simple pipeline to train a model in GKE. \n", + "* Building an XGBoost model inside a notebook\n", + "* Training the model inside the notebook\n", + "* Performing inference using the model inside the notebook\n", + "* Using Kubeflow Fairing to launch training jobs on Kubernetes\n", + "* Using Kubeflow Fairing to build and deploy a model using [Seldon Core](https://www.seldon.io/)\n", + "* Using [Kubeflow metadata](https://github.com/kubeflow/metadata) to record metadata about your models\n", + "* Using [Kubeflow Pipelines](https://www.kubeflow.org/docs/pipelines/) to build a pipeline to train your model\n", "\n", - "To learn more about how to run this notebook locally, see the guide to [training and deploying on GCP from a local notebook][gcp-local-notebook].\n", + "## Prerequisites \n", "\n", - "[gcp-local-notebook]: https://kubeflow.org/docs/fairing/gcp/tutorials/gcp-local-notebook/" + "* This notebook assumes you are running inside 0.6 Kubeflow deployed on GKE following the [GKE instructions](https://www.kubeflow.org/docs/gke/deploy/)\n", + "* If you are running somewhere other than GKE you will need to modify the notebook to use a different docker registry or else configure Kubeflow to work with GCR." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Set up your notebook for training an XGBoost model\n", + "### Verify we have a GCP account\n", + "\n", + "* The cell below checks that this notebook was spawned with credentials to access GCP\n", + "* To add credentials when you created the notebook you should have selected add gcp credential as shown below\n", + " ![add credential](images/addgcpsecret.png)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "if not os.getenv(\"GOOGLE_APPLICATION_CREDENTIALS\"):\n", + " raise ValueError(\"Notebook is missing google application credentials\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Install Required Libraries\n", "\n", "Import the libraries required to train this model." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: retrying in /home/jovyan/.local/lib/python3.6/site-packages (1.3.3)\n", + "Requirement already satisfied: six>=1.7.0 in /opt/conda/lib/python3.6/site-packages (from retrying) (1.12.0)\n", + "\u001b[33mYou are using pip version 19.0.1, however version 19.2.2 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], "source": [ - "!pip3 install retrying\n", - "!pip3 install fairing\n", - "!pip3 install kfmd" + "!pip3 install retrying" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Install a specific version of fairing that this example is tested against" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting git+git://github.com/kubeflow/fairing.git@c6c075dece72135f5883abfe2a296894d74a2367\n", + " Cloning git://github.com/kubeflow/fairing.git (to revision c6c075dece72135f5883abfe2a296894d74a2367) to /tmp/pip-req-build-n8eh3_5o\n", + "Requirement already satisfied (use --upgrade to upgrade): fairing==0.5.3 from git+git://github.com/kubeflow/fairing.git@c6c075dece72135f5883abfe2a296894d74a2367 in /opt/conda/lib/python3.6/site-packages\n", + "Requirement already satisfied: docker>=3.4.1 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (3.7.2)\n", + "Requirement already satisfied: notebook>=5.6.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (5.7.8)\n", + "Requirement already satisfied: kubernetes>=9.0.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (9.0.0)\n", + "Requirement already satisfied: future>=0.17.1 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (0.17.1)\n", + "Requirement already satisfied: six>=1.11.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.12.0)\n", + "Requirement already satisfied: google-cloud-storage>=1.13.2 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.14.0)\n", + "Requirement already satisfied: requests>=2.21.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (2.21.0)\n", + "Requirement already satisfied: setuptools>=34.0.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (40.9.0)\n", + "Requirement already satisfied: google-auth>=1.6.2 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.6.3)\n", + "Requirement already satisfied: httplib2>=0.12.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (0.12.1)\n", + "Requirement already satisfied: oauth2client>=4.0.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (4.1.3)\n", + "Requirement already satisfied: tornado<6.0.0,>=5.1.1 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (5.1.1)\n", + "Requirement already satisfied: google-api-python-client>=1.7.8 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.7.8)\n", + "Requirement already satisfied: cloudpickle>=0.8 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (0.8.1)\n", + "Requirement already satisfied: numpy>=1.14 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.16.2)\n", + "Requirement already satisfied: urllib3==1.24.2 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.24.2)\n", + "Requirement already satisfied: boto3>=1.9.0 in /opt/conda/lib/python3.6/site-packages (from fairing==0.5.3) (1.9.210)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /opt/conda/lib/python3.6/site-packages (from docker>=3.4.1->fairing==0.5.3) (0.4.0)\n", + "Requirement already satisfied: websocket-client>=0.32.0 in /opt/conda/lib/python3.6/site-packages (from docker>=3.4.1->fairing==0.5.3) (0.56.0)\n", + "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (2.10)\n", + "Requirement already satisfied: traitlets>=4.2.1 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (4.3.2)\n", + "Requirement already satisfied: ipython-genutils in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (0.2.0)\n", + "Requirement already satisfied: pyzmq>=17 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (18.0.1)\n", + "Requirement already satisfied: jupyter-core>=4.4.0 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (4.4.0)\n", + "Requirement already satisfied: jupyter-client>=5.2.0 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (5.2.4)\n", + "Requirement already satisfied: nbconvert in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (5.4.1)\n", + "Requirement already satisfied: Send2Trash in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (1.5.0)\n", + "Requirement already satisfied: nbformat in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (4.4.0)\n", + "Requirement already satisfied: terminado>=0.8.1 in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (0.8.2)\n", + "Requirement already satisfied: ipykernel in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (5.1.0)\n", + "Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.6/site-packages (from notebook>=5.6.0->fairing==0.5.3) (0.6.0)\n", + "Requirement already satisfied: certifi>=14.05.14 in /opt/conda/lib/python3.6/site-packages (from kubernetes>=9.0.0->fairing==0.5.3) (2019.3.9)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /opt/conda/lib/python3.6/site-packages (from kubernetes>=9.0.0->fairing==0.5.3) (2.8.0)\n", + "Requirement already satisfied: pyyaml>=3.12 in /opt/conda/lib/python3.6/site-packages (from kubernetes>=9.0.0->fairing==0.5.3) (5.1)\n", + "Requirement already satisfied: requests-oauthlib in /opt/conda/lib/python3.6/site-packages (from kubernetes>=9.0.0->fairing==0.5.3) (1.2.0)\n", + "Requirement already satisfied: google-api-core<2.0.0dev,>=1.6.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage>=1.13.2->fairing==0.5.3) (1.9.0)\n", + "Requirement already satisfied: google-resumable-media>=0.3.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage>=1.13.2->fairing==0.5.3) (0.3.2)\n", + "Requirement already satisfied: google-cloud-core<0.30dev,>=0.29.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage>=1.13.2->fairing==0.5.3) (0.29.1)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests>=2.21.0->fairing==0.5.3) (3.0.4)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests>=2.21.0->fairing==0.5.3) (2.8)\n", + "Requirement already satisfied: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.6.2->fairing==0.5.3) (3.1.0)\n", + "Requirement already satisfied: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.6.2->fairing==0.5.3) (4.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.6.2->fairing==0.5.3) (0.2.4)\n", + "Requirement already satisfied: pyasn1>=0.1.7 in /opt/conda/lib/python3.6/site-packages (from oauth2client>=4.0.0->fairing==0.5.3) (0.4.5)\n", + "Requirement already satisfied: google-auth-httplib2>=0.0.3 in /opt/conda/lib/python3.6/site-packages (from google-api-python-client>=1.7.8->fairing==0.5.3) (0.0.3)\n", + "Requirement already satisfied: uritemplate<4dev,>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from google-api-python-client>=1.7.8->fairing==0.5.3) (3.0.0)\n", + "Requirement already satisfied: botocore<1.13.0,>=1.12.210 in /opt/conda/lib/python3.6/site-packages (from boto3>=1.9.0->fairing==0.5.3) (1.12.210)\n", + "Requirement already satisfied: s3transfer<0.3.0,>=0.2.0 in /opt/conda/lib/python3.6/site-packages (from boto3>=1.9.0->fairing==0.5.3) (0.2.1)\n", + "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.6/site-packages (from boto3>=1.9.0->fairing==0.5.3) (0.9.4)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.6/site-packages (from jinja2->notebook>=5.6.0->fairing==0.5.3) (1.1.1)\n", + "Requirement already satisfied: decorator in /opt/conda/lib/python3.6/site-packages (from traitlets>=4.2.1->notebook>=5.6.0->fairing==0.5.3) (4.4.0)\n", + "Requirement already satisfied: defusedxml in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (0.5.0)\n", + "Requirement already satisfied: testpath in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (0.4.2)\n", + "Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (0.3)\n", + "Requirement already satisfied: pygments in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (2.3.1)\n", + "Requirement already satisfied: mistune>=0.8.1 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (0.8.4)\n", + "Requirement already satisfied: bleach in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (3.1.0)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=5.6.0->fairing==0.5.3) (1.4.2)\n", + "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /opt/conda/lib/python3.6/site-packages (from nbformat->notebook>=5.6.0->fairing==0.5.3) (3.0.1)\n", + "Requirement already satisfied: ptyprocess; os_name != \"nt\" in /opt/conda/lib/python3.6/site-packages (from terminado>=0.8.1->notebook>=5.6.0->fairing==0.5.3) (0.6.0)\n", + "Requirement already satisfied: ipython>=5.0.0 in /opt/conda/lib/python3.6/site-packages (from ipykernel->notebook>=5.6.0->fairing==0.5.3) (7.4.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from requests-oauthlib->kubernetes>=9.0.0->fairing==0.5.3) (3.0.1)\n", + "Requirement already satisfied: pytz in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage>=1.13.2->fairing==0.5.3) (2018.9)\n", + "Requirement already satisfied: protobuf>=3.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage>=1.13.2->fairing==0.5.3) (3.7.1)\n", + "Requirement already satisfied: googleapis-common-protos!=1.5.4,<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.6.0->google-cloud-storage>=1.13.2->fairing==0.5.3) (1.5.9)\n", + "Requirement already satisfied: docutils<0.16,>=0.10 in /opt/conda/lib/python3.6/site-packages (from botocore<1.13.0,>=1.12.210->boto3>=1.9.0->fairing==0.5.3) (0.15.2)\n", + "Requirement already satisfied: webencodings in /opt/conda/lib/python3.6/site-packages (from bleach->nbconvert->notebook>=5.6.0->fairing==0.5.3) (0.5.1)\n", + "Requirement already satisfied: attrs>=17.4.0 in /opt/conda/lib/python3.6/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->notebook>=5.6.0->fairing==0.5.3) (19.1.0)\n", + "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.6/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat->notebook>=5.6.0->fairing==0.5.3) (0.14.11)\n", + "Requirement already satisfied: prompt-toolkit<2.1.0,>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (2.0.9)\n", + "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /opt/conda/lib/python3.6/site-packages (from ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (4.6.0)\n", + "Requirement already satisfied: backcall in /opt/conda/lib/python3.6/site-packages (from ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (0.1.0)\n", + "Requirement already satisfied: jedi>=0.10 in /opt/conda/lib/python3.6/site-packages (from ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (0.13.3)\n", + "Requirement already satisfied: pickleshare in /opt/conda/lib/python3.6/site-packages (from ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (0.7.5)\n", + "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.6/site-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (0.1.7)\n", + "Requirement already satisfied: parso>=0.3.0 in /opt/conda/lib/python3.6/site-packages (from jedi>=0.10->ipython>=5.0.0->ipykernel->notebook>=5.6.0->fairing==0.5.3) (0.4.0)\n", + "Building wheels for collected packages: fairing\n", + " Building wheel for fairing (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Stored in directory: /tmp/pip-ephem-wheel-cache-nw9r0vn2/wheels/bf/3f/0f/c65ae27dc4acd9443a98bfe546ad571e9a2d9f05905274ae44\n", + "Successfully built fairing\n", + "\u001b[33mYou are using pip version 19.0.1, however version 19.2.2 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "!pip3 install git+git://github.com/kubeflow/fairing.git@c6c075dece72135f5883abfe2a296894d74a2367" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Perform some notebook setup" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": { "scrolled": false }, @@ -52,12 +201,20 @@ "from pathlib import Path\n", "import os\n", "\n", - "util.notebook_setup()\n" + "util.notebook_setup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Import the python libraries we will use\n", + "* We add a comment \"fairing:include-cell\" to tell the fairing preprocessor to keep this cell when converting to python code later" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -80,12 +237,16 @@ "from importlib import reload\n", "from sklearn.datasets import make_regression\n", "from kfmd import metadata\n", - "from datetime import datetime\n" + "from kfmd import openapi_client\n", + "from kfmd.openapi_client import Configuration, ApiClient, MetadataServiceApi\n", + "from datetime import datetime\n", + "import retrying\n", + "import urllib3" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -102,9 +263,19 @@ "from fairing.preprocessors.converted_notebook import ConvertNotebookPreprocessorWithFire\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code to train and predict \n", + "\n", + "* In the cells below we define some functions to generate data and train a model\n", + "* These functions could just as easily be defined in a separate python module" + ] + }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -127,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -161,32 +332,51 @@ "def save_model(model, model_file):\n", " \"\"\"Save XGBoost model for serving.\"\"\"\n", " joblib.dump(model, model_file)\n", - " logging.info(\"Model export success: %s\", model_file)" + " logging.info(\"Model export success: %s\", model_file)\n", + "\n", + "@retrying.retry(stop_max_delay=180000)\n", + "def wait_for_istio(address=\"metadata-service.kubeflow.svc.cluster.local:8080\"):\n", + " \"\"\"Wait until we can connect to the metadata service.\n", + " \n", + " When we launch a K8s pod we may not be able to connect to the metadata service immediately\n", + " because the ISTIO side car hasn't started.\n", + " \n", + " This function allows us to wait for a time specified up to stop_max_delay to see if the service\n", + " is ready. \n", + " \"\"\"\n", + " config = Configuration()\n", + " config.host = address\n", + " api_client = ApiClient(config)\n", + " client = MetadataServiceApi(api_client)\n", + "\n", + " client.list_artifacts2()\n", + " \n", + "def create_workspace():\n", + " return metadata.Workspace(\n", + " # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n", + " backend_url_prefix=\"metadata-service.kubeflow.svc.cluster.local:8080\",\n", + " name=\"xgboost-synthetic\",\n", + " description=\"workspace for xgboost-synthetic artifacts and executions\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Define various constants" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Define Train and Predict functions" + "## Wrap Training and Prediction in a class\n", + "\n", + "* In the cell below we wrap training and prediction in a class\n", + "* A class provides the structure we will need to eventually use fairing to launch separate training jobs and/or deploy the model on Kubernetes" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# fairing:include-cell\n", - "class ModelServe(object):\n", - " \n", + "class ModelServe(object): \n", " def __init__(self, model_file=None):\n", " self.n_estimators = 50\n", " self.learning_rate = 0.1\n", @@ -202,10 +392,14 @@ " print(\"model_file={0}\".format(self.model_file))\n", " \n", " self.model = None\n", + " self._workspace = None\n", " self.exec = self.create_execution()\n", "\n", " def train(self):\n", " (train_X, train_y), (test_X, test_y) = read_synthetic_input()\n", + " \n", + " # Here we use Kubeflow's metadata library to record information\n", + " # about the training run to Kubeflow's metadata store.\n", " self.exec.log_input(metadata.DataSet(\n", " description=\"xgboost synthetic data\",\n", " name=\"synthetic-data\",\n", @@ -221,6 +415,8 @@ " self.learning_rate)\n", "\n", " mae = eval_model(model, test_X, test_y)\n", + " \n", + " # Here we log metrics about the model to Kubeflow's metadata store.\n", " self.exec.log_output(metadata.Metrics(\n", " name=\"xgboost-synthetic-traing-eval\",\n", " owner=\"someone@kubeflow.org\",\n", @@ -247,29 +443,35 @@ " version=datetime.utcnow().isoformat(\"T\")))\n", " \n", " def predict(self, X, feature_names):\n", - " \"\"\"Predict using the model for given ndarray.\"\"\"\n", + " \"\"\"Predict using the model for given ndarray.\n", + " \n", + " The predict signature should match the syntax expected by Seldon Core\n", + " https://github.com/SeldonIO/seldon-core so that we can use\n", + " Seldon h to wrap it a model server and deploy it on Kubernetes\n", + " \"\"\"\n", " if not self.model:\n", " self.model = joblib.load(self.model_file)\n", " # Do any preprocessing\n", " prediction = self.model.predict(data=X)\n", " # Do any postprocessing\n", " return [[prediction.item(0), prediction.item(1)]]\n", + "\n", + " @property\n", + " def workspace(self):\n", + " if not self._workspace:\n", + " wait_for_istio()\n", + " self._workspace = create_workspace()\n", + " return self._workspace\n", " \n", - " def create_execution(self):\n", - " workspace = metadata.Workspace(\n", - " # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n", - " backend_url_prefix=\"metadata-service.kubeflow:8080\",\n", - " name=\"xgboost-synthetic\",\n", - " description=\"workspace for xgboost-synthetic artifacts and executions\")\n", - " \n", + " def create_execution(self): \n", " r = metadata.Run(\n", - " workspace=workspace,\n", + " workspace=self.workspace,\n", " name=\"xgboost-synthetic-faring-run\" + datetime.utcnow().isoformat(\"T\"),\n", " description=\"a notebook run\")\n", "\n", " return metadata.Execution(\n", " name = \"execution\" + datetime.utcnow().isoformat(\"T\"),\n", - " workspace=workspace,\n", + " workspace=self.workspace,\n", " run=r,\n", " description=\"execution for training xgboost-synthetic\")" ] @@ -280,12 +482,13 @@ "source": [ "## Train your Model Locally\n", "\n", - "* Train your model locally inside your notebook" + "* Train your model locally inside your notebook\n", + "* To train locally we just instatiante the ModelServe class and then call train" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -293,64 +496,64 @@ "output_type": "stream", "text": [ "model_file=mockup-model.dat\n", - "[0]\tvalidation_0-rmse:145.743\n", + "[0]\tvalidation_0-rmse:112.335\n", "Will train until validation_0-rmse hasn't improved in 40 rounds.\n", - "[1]\tvalidation_0-rmse:137.786\n", - "[2]\tvalidation_0-rmse:129.221\n", - "[3]\tvalidation_0-rmse:122.795\n", - "[4]\tvalidation_0-rmse:117.913\n", - "[5]\tvalidation_0-rmse:113.441\n", - "[6]\tvalidation_0-rmse:108.843\n", - "[7]\tvalidation_0-rmse:104.968\n", - "[8]\tvalidation_0-rmse:101.756\n", - "[9]\tvalidation_0-rmse:98.9659\n", - "[10]\tvalidation_0-rmse:96.2215\n", - "[11]\tvalidation_0-rmse:93.6806\n", - "[12]\tvalidation_0-rmse:90.5423\n", - "[13]\tvalidation_0-rmse:88.1216\n", - "[14]\tvalidation_0-rmse:85.4835\n", - "[15]\tvalidation_0-rmse:83.1785\n", - "[16]\tvalidation_0-rmse:80.9087\n", - "[17]\tvalidation_0-rmse:78.916\n", - "[18]\tvalidation_0-rmse:77.5187\n", - "[19]\tvalidation_0-rmse:75.0274\n", - "[20]\tvalidation_0-rmse:74.0297\n", - "[21]\tvalidation_0-rmse:72.1579\n", - "[22]\tvalidation_0-rmse:70.6119\n", - "[23]\tvalidation_0-rmse:69.7389\n", - "[24]\tvalidation_0-rmse:67.9469\n", - "[25]\tvalidation_0-rmse:66.8921\n", - "[26]\tvalidation_0-rmse:66.1554\n", - "[27]\tvalidation_0-rmse:64.6994\n", - "[28]\tvalidation_0-rmse:63.5188\n", - "[29]\tvalidation_0-rmse:62.7831\n", - "[30]\tvalidation_0-rmse:62.3533\n", - "[31]\tvalidation_0-rmse:61.9013\n", - "[32]\tvalidation_0-rmse:60.8512\n", - "[33]\tvalidation_0-rmse:60.1541\n", - "[34]\tvalidation_0-rmse:59.5948\n", - "[35]\tvalidation_0-rmse:59.0876\n", - "[36]\tvalidation_0-rmse:58.6049\n", - "[37]\tvalidation_0-rmse:58.2507\n", - "[38]\tvalidation_0-rmse:57.4195\n", - "[39]\tvalidation_0-rmse:57.0364\n", - "[40]\tvalidation_0-rmse:56.634\n", - "[41]\tvalidation_0-rmse:56.279\n", - "[42]\tvalidation_0-rmse:56.1874\n", - "[43]\tvalidation_0-rmse:55.5723\n", - "[44]\tvalidation_0-rmse:55.4855\n", - "[45]\tvalidation_0-rmse:54.8205\n", - "[46]\tvalidation_0-rmse:54.663\n", - "[47]\tvalidation_0-rmse:54.1199\n", - "[48]\tvalidation_0-rmse:53.8837\n", - "[49]\tvalidation_0-rmse:53.6094\n" + "[1]\tvalidation_0-rmse:106.517\n", + "[2]\tvalidation_0-rmse:101.393\n", + "[3]\tvalidation_0-rmse:95.7595\n", + "[4]\tvalidation_0-rmse:91.8237\n", + "[5]\tvalidation_0-rmse:88.2375\n", + "[6]\tvalidation_0-rmse:83.3448\n", + "[7]\tvalidation_0-rmse:80.6862\n", + "[8]\tvalidation_0-rmse:77.7361\n", + "[9]\tvalidation_0-rmse:75.0139\n", + "[10]\tvalidation_0-rmse:72.0667\n", + "[11]\tvalidation_0-rmse:70.2466\n", + "[12]\tvalidation_0-rmse:68.9596\n", + "[13]\tvalidation_0-rmse:66.8088\n", + "[14]\tvalidation_0-rmse:64.6703\n", + "[15]\tvalidation_0-rmse:63.0301\n", + "[16]\tvalidation_0-rmse:61.259\n", + "[17]\tvalidation_0-rmse:59.987\n", + "[18]\tvalidation_0-rmse:58.5032\n", + "[19]\tvalidation_0-rmse:57.2869\n", + "[20]\tvalidation_0-rmse:56.1678\n", + "[21]\tvalidation_0-rmse:55.3422\n", + "[22]\tvalidation_0-rmse:54.2746\n", + "[23]\tvalidation_0-rmse:53.4287\n", + "[24]\tvalidation_0-rmse:52.5733\n", + "[25]\tvalidation_0-rmse:51.594\n", + "[26]\tvalidation_0-rmse:50.6855\n", + "[27]\tvalidation_0-rmse:50.0631\n", + "[28]\tvalidation_0-rmse:49.4712\n", + "[29]\tvalidation_0-rmse:48.9603\n", + "[30]\tvalidation_0-rmse:48.5284\n", + "[31]\tvalidation_0-rmse:47.9727\n", + "[32]\tvalidation_0-rmse:47.2481\n", + "[33]\tvalidation_0-rmse:46.5567\n", + "[34]\tvalidation_0-rmse:46.1378\n", + "[35]\tvalidation_0-rmse:45.2184\n", + "[36]\tvalidation_0-rmse:44.5527\n", + "[37]\tvalidation_0-rmse:44.2344\n", + "[38]\tvalidation_0-rmse:43.9775\n", + "[39]\tvalidation_0-rmse:43.398\n", + "[40]\tvalidation_0-rmse:43.0096\n", + "[41]\tvalidation_0-rmse:42.3342\n", + "[42]\tvalidation_0-rmse:42.0125\n", + "[43]\tvalidation_0-rmse:41.7512\n", + "[44]\tvalidation_0-rmse:41.3573\n", + "[45]\tvalidation_0-rmse:41.1225\n", + "[46]\tvalidation_0-rmse:40.9997\n", + "[47]\tvalidation_0-rmse:40.736\n", + "[48]\tvalidation_0-rmse:40.498\n", + "[49]\tvalidation_0-rmse:40.0949\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "mean_absolute_error=41.16\n", + "mean_absolute_error=31.81\n", "Model export success: mockup-model.dat\n" ] }, @@ -358,12 +561,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Best RMSE on eval: %.2f with %d rounds 53.609386 50\n" + "Best RMSE on eval: %.2f with %d rounds 40.094906 50\n" ] } ], "source": [ - "ModelServe(model_file=\"mockup-model.dat\").train()" + "model = ModelServe(model_file=\"mockup-model.dat\")\n", + "model.train()" ] }, { @@ -372,12 +576,13 @@ "source": [ "## Predict locally\n", "\n", - "* Run prediction inside the notebook using the newly created notebook" + "* Run prediction inside the notebook using the newly created model\n", + "* To run prediction we just invoke redict" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -385,17 +590,16 @@ "output_type": "stream", "text": [ "model_file not supplied; using the default\n", - "model_file=mockup-model.dat\n", - "[14:45:28] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" + "model_file=mockup-model.dat\n" ] }, { "data": { "text/plain": [ - "[[68.33491516113281, 68.33491516113281]]" + "[[14.064239501953125, 11.37496566772461]]" ] }, - "execution_count": 8, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -410,43 +614,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Use Fairing to Launch a K8s Job to train your model" + "## Use Fairing to Launch a K8s Job to train your model\n", + "\n", + "* Now that we have trained a model locally we can use fairing to\n", + " 1. Launch a Kubernetes job to train the model\n", + " 1. Deploy the model on Kubernetes\n", + "* Launching a separate Kubernetes job to train the model has the following advantages\n", + "\n", + " * You can leverage Kubernetes to run multiple training jobs in parallel \n", + " * You can run long running jobs without blocking your kernel" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Set up Kubeflow Fairing for training and predictions\n", + "### Configure The Docker Registry For Fairing\n", "\n", - "Import the `fairing` library and configure the environment that your training or prediction job will run in." + "* In order to build docker images from your notebook we need a docker registry where the images will be stored\n", + "* Below you set some variables specifying a [GCR container registry](https://cloud.google.com/container-registry/docs/)\n", + "* Fairing provides a utility function to guess the name of your GCP project" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "issue-label-bot-dev\n", - "gcr.io/issue-label-bot-dev/fairing-job\n" - ] - } - ], + "outputs": [], "source": [ "# Setting up google container repositories (GCR) for storing output containers\n", "# You can use any docker container registry istead of GCR\n", "GCP_PROJECT = fairing.cloud.gcp.guess_project_name()\n", - "print(GCP_PROJECT)\n", - "DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)\n", - "print(DOCKER_REGISTRY)\n", - "PY_VERSION = \".\".join([str(x) for x in sys.version_info[0:3]])\n", - "BASE_IMAGE = 'python:{}'.format(PY_VERSION)\n", - "# ucan use Dockerfile in this repo to build and use the base_image\n", - "base_image = \"gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\"\n" + "DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)" ] }, { @@ -455,21 +654,38 @@ "source": [ "## Use fairing to build the docker image\n", "\n", - "* This uses the append builder to rapidly build docker images" + "* First you will use fairing's kaniko builder to build a docker image that includes all your dependencies\n", + " * You use kaniko because you want to be able to run `pip` to install dependencies\n", + " * Kaniko gives you the flexibility to build images from Dockerfiles\n", + "* kaniko, however, can be slow\n", + "* so you will build a base image using Kaniko and then every time your code changes you will just build an image\n", + " starting from your base image and adding your code to it\n", + "* you use the fairing build to enable these fast rebuilds" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n" + ] + }, { "data": { "text/plain": [ - "[PosixPath('build-train-deploy.py'), 'xgboost_util.py', 'mockup-model.dat']" + "[PosixPath('build-train-deploy.py'),\n", + " 'requirements.txt',\n", + " 'xgboost_util.py',\n", + " 'mockup-model.dat']" ] }, - "execution_count": 10, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -480,106 +696,27 @@ "\n", "if not preprocessor.input_files:\n", " preprocessor.input_files = set()\n", - "input_files=[\"xgboost_util.py\", \"mockup-model.dat\"]\n", + "input_files=[\"xgboost_util.py\", \"mockup-model.dat\", \"requirements.txt\"]\n", "preprocessor.input_files = set([os.path.normpath(f) for f in input_files])\n", "preprocessor.preprocess()" ] }, { - "cell_type": "code", - "execution_count": 11, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Building image using cluster builder.\n", - "Creating docker context: /tmp/fairing_context_5d629kor\n", - "Waiting for fairing-builder-lz9zx to start...\n", - "Pod started running True\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mINFO\u001b[0m[0000] Downloading base image gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\n", - "\u001b[36mINFO\u001b[0m[0000] Downloading base image gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\n", - "\u001b[33mWARN\u001b[0m[0000] Error while retrieving image from cache: getting image from path: open /cache/sha256:f90e54e312c4cfba28bec6993add2a85b4e127b52149ec0aaf41e5f8889a4086: no such file or directory\n", - "\u001b[36mINFO\u001b[0m[0000] Checking for cached layer gcr.io/issue-label-bot-dev/fairing-job/fairing-job/cache:e46cfa04f5f0d0445ce3ce8b91886d94e96f2875510a69aa9afaeb0ba9e62fc4...\n", - "\u001b[36mINFO\u001b[0m[0000] Using caching version of cmd: RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", - "\u001b[36mINFO\u001b[0m[0000] Using files from context: [/kaniko/buildcontext/app]\n", - "\u001b[36mINFO\u001b[0m[0000] Taking snapshot of full filesystem...\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /dev, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /etc/secrets, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /kaniko, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /proc, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /sys, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] Skipping paths under /var/run, as it is a whitelisted directory\n", - "\u001b[36mINFO\u001b[0m[0000] WORKDIR /app/\n", - "\u001b[36mINFO\u001b[0m[0000] cmd: workdir\n", - "\u001b[36mINFO\u001b[0m[0000] Changed working directory to /app/\n", - "\u001b[36mINFO\u001b[0m[0000] Creating directory /app/\n", - "\u001b[36mINFO\u001b[0m[0000] Taking snapshot of files...\n", - "\u001b[36mINFO\u001b[0m[0000] ENV FAIRING_RUNTIME 1\n", - "\u001b[36mINFO\u001b[0m[0000] No files changed in this command, skipping snapshotting.\n", - "\u001b[36mINFO\u001b[0m[0000] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", - "\u001b[36mINFO\u001b[0m[0000] Found cached layer, extracting to filesystem\n", - "\u001b[36mINFO\u001b[0m[0001] No files changed in this command, skipping snapshotting.\n", - "\u001b[36mINFO\u001b[0m[0001] Using files from context: [/kaniko/buildcontext/app]\n", - "\u001b[36mINFO\u001b[0m[0001] COPY /app/ /app/\n", - "\u001b[36mINFO\u001b[0m[0001] Taking snapshot of files...\n", - "2019/07/18 21:45:45 existing blob: sha256:d13453f7d2b8d0adfd86c3989a5b695cef5afc3efaafe559643071f258c9f06d\n", - "2019/07/18 21:45:45 existing blob: sha256:0ba512db704a2eb85f7f372d1c809d58589531e3bae794f0aaba86cee912f923\n", - "2019/07/18 21:45:45 existing blob: sha256:9ee379bde91a3cecfb08d4189af0a2bcecc2da1c5102e49443088ccd7bd9abfa\n", - "2019/07/18 21:45:45 existing blob: sha256:507170ae8cfaca6cf2999295221d1324f1051fa15ba59e04dd7dafdc8de565bc\n", - "2019/07/18 21:45:45 existing blob: sha256:2f1ee468081da0ca09360c50281ed261d8b3fb01f664262c3f278d8619eb4e9a\n", - "2019/07/18 21:45:45 existing blob: sha256:d099b15c53311dc296426716edabe61dcc19e88009c19098b17ba965357c4391\n", - "2019/07/18 21:45:45 existing blob: sha256:bad6918fba4b1c68f82d1a4b6063b3ce64975a73b33b38b35454b1d484a6b57b\n", - "2019/07/18 21:45:45 existing blob: sha256:0fd02182c40eb28e13c4d7efd5dd4c81d985d9b07c9c809cc26e7bdb2dced07e\n", - "2019/07/18 21:45:45 existing blob: sha256:079dd3e30fa3eed702bb20a2f725da9907e2732bdc4dfb2fb5084a3423c3f743\n", - "2019/07/18 21:45:45 existing blob: sha256:e7fea64fabbc6f5961864ce5c6bcc143ab616d325b0c5a26848d8e427806104f\n", - "2019/07/18 21:45:45 existing blob: sha256:a5ba9de0ac70b35658f5898c27b52063a597d790308fb853021e881e04a6efb7\n", - "2019/07/18 21:45:45 existing blob: sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6\n", - "2019/07/18 21:45:45 existing blob: sha256:bbf0f5f91e8108d9b0be1ceeb749e63788ce7394a184bc8a70d24017eca7b7ba\n", - "2019/07/18 21:45:45 existing blob: sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be\n", - "2019/07/18 21:45:45 existing blob: sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1\n", - "2019/07/18 21:45:45 existing blob: sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8\n", - "2019/07/18 21:45:45 existing blob: sha256:55dbf73eb7c7c005c3ccff29b62ff180e2f29245d14794dd6d5d8ad855d0ea88\n", - "2019/07/18 21:45:45 existing blob: sha256:4bfa6a63a3897359eff3ca3ee27c2e05ba76b790a07e6583714c1d324c2d4f21\n", - "2019/07/18 21:45:45 existing blob: sha256:5d8a6f34a39a1e098f09b39ee4e9d4a178fef6ec71c2046fe0b040c4667c8143\n", - "2019/07/18 21:45:45 existing blob: sha256:b893ca5fa31bb87be0d3fa3a403dac7ca12c955d6fd522fd35e3260dbd0e99da\n", - "2019/07/18 21:45:45 existing blob: sha256:ecc17173ccb5b7692a6d31b0077b8e4f543fb45f8c2b5c252dcad9ad0c9be0f7\n", - "2019/07/18 21:45:45 existing blob: sha256:eed14867f5ee443ad7efc89d0d4392683799a413244feec120f43074bc2d43ef\n", - "2019/07/18 21:45:45 existing blob: sha256:07e06c833ecb3b115e378d7f2ba5817ba77cfd02f5794a9817ede0622fbbf8a5\n", - "2019/07/18 21:45:45 existing blob: sha256:541a15d3a9d79f7d3e5e0f552f396406b3e3093247f71e0ae71dd8b7242ec428\n", - "2019/07/18 21:45:45 existing blob: sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245\n", - "2019/07/18 21:45:45 existing blob: sha256:8143617e89d7ba1957e3dc6d7093a48bd0cd4a2a709bc0c9d0ffc6dde11467e8\n", - "2019/07/18 21:45:45 existing blob: sha256:2327f2e2474891211dbf7fb2d54e16e7b2889fea157b726645cc05e75ad917e8\n", - "2019/07/18 21:45:45 existing blob: sha256:8c58e650bb886ab24426958165c15abe1a1c10e8710f50233701fd503e23e7ac\n", - "2019/07/18 21:45:45 existing blob: sha256:90a7e2cb4d7460e55f83c6e47f9f8d089895ee6e1cc51ae5c23eab3bdcb70363\n", - "2019/07/18 21:45:45 existing blob: sha256:1cf84c00b8903926c231b4b5974c0419556a4a578bf9416f585fcbf1b7aa70ab\n", - "2019/07/18 21:45:46 pushed blob sha256:8ab941f264e893bf2d02a0f6d2972fa5f725995cba85b0a897cee1531525bba1\n", - "2019/07/18 21:45:46 pushed blob sha256:acb611ba3316584866914521fe68dd9892e3fea865900f7c15f2f7268587cd93\n", - "2019/07/18 21:45:46 pushed blob sha256:80794aeb9ef80da69469ae895f20899b52d9115e4161543c83774863e97fc507\n", - "2019/07/18 21:45:47 gcr.io/issue-label-bot-dev/fairing-job/fairing-job:E480ACAF: digest: sha256:1c10c3629d920b78e54f16fe268eb77f976d1ff5a48b31a9f54df478ff012a2a size: 5468\n" - ] - } - ], "source": [ - "cluster_builder = cluster.cluster.ClusterBuilder(registry=DOCKER_REGISTRY,\n", - " base_image=base_image,\n", - " namespace='kubeflow',\n", - " preprocessor=preprocessor,\n", - " pod_spec_mutators=[fairing.cloud.gcp.add_gcp_credentials_if_exists],\n", - " context_source=cluster.gcs_context.GCSContextSource())\n", - "cluster_builder.build()" + "### Build the base image\n", + "\n", + "* You use cluster_builder to build the base image\n", + "* You only need to perform this again if we change our Docker image or the dependencies we need to install\n", + "* ClusterBuilder takes as input the DockerImage to use as a base image\n", + "* You should use the same Jupyter image that you are using for your notebook server so that your environment will be\n", + " the same when you launch Kubernetes jobs" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": { "scrolled": true }, @@ -588,58 +725,177 @@ "name": "stderr", "output_type": "stream", "text": [ - "Building image using Append builder...\n", - "Creating docker context: /tmp/fairing_context_xpzlon_h\n", - "build-train-deploy.py already exists in Fairing context, skipping...\n", - "Loading Docker credentials for repository 'gcr.io/issue-label-bot-dev/fairing-job/fairing-job:E480ACAF'\n", - "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", - "Successfully obtained Docker credentials.\n", - "Image successfully built in 1.2515304939588532s.\n", - "Pushing image gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0...\n", - "Loading Docker credentials for repository 'gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0'\n", - "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", - "Successfully obtained Docker credentials.\n", - "Uploading gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0\n", - "Layer sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be exists, skipping\n", - "Layer sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6 exists, skipping\n", - "Layer sha256:bbf0f5f91e8108d9b0be1ceeb749e63788ce7394a184bc8a70d24017eca7b7ba exists, skipping\n", - "Layer sha256:e7fea64fabbc6f5961864ce5c6bcc143ab616d325b0c5a26848d8e427806104f exists, skipping\n", - "Layer sha256:d099b15c53311dc296426716edabe61dcc19e88009c19098b17ba965357c4391 exists, skipping\n", - "Layer sha256:079dd3e30fa3eed702bb20a2f725da9907e2732bdc4dfb2fb5084a3423c3f743 exists, skipping\n", - "Layer sha256:80794aeb9ef80da69469ae895f20899b52d9115e4161543c83774863e97fc507 exists, skipping\n", - "Layer sha256:eed14867f5ee443ad7efc89d0d4392683799a413244feec120f43074bc2d43ef exists, skipping\n", - "Layer sha256:55dbf73eb7c7c005c3ccff29b62ff180e2f29245d14794dd6d5d8ad855d0ea88 exists, skipping\n", - "Layer sha256:8ab941f264e893bf2d02a0f6d2972fa5f725995cba85b0a897cee1531525bba1 exists, skipping\n", - "Layer sha256:2327f2e2474891211dbf7fb2d54e16e7b2889fea157b726645cc05e75ad917e8 exists, skipping\n", - "Layer sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245 exists, skipping\n", - "Layer sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1 exists, skipping\n", - "Layer sha256:d13453f7d2b8d0adfd86c3989a5b695cef5afc3efaafe559643071f258c9f06d exists, skipping\n", - "Layer sha256:2f1ee468081da0ca09360c50281ed261d8b3fb01f664262c3f278d8619eb4e9a exists, skipping\n", - "Layer sha256:8c58e650bb886ab24426958165c15abe1a1c10e8710f50233701fd503e23e7ac exists, skipping\n", - "Layer sha256:507170ae8cfaca6cf2999295221d1324f1051fa15ba59e04dd7dafdc8de565bc exists, skipping\n", - "Layer sha256:b893ca5fa31bb87be0d3fa3a403dac7ca12c955d6fd522fd35e3260dbd0e99da exists, skipping\n", - "Layer sha256:8143617e89d7ba1957e3dc6d7093a48bd0cd4a2a709bc0c9d0ffc6dde11467e8 exists, skipping\n", - "Layer sha256:1cf84c00b8903926c231b4b5974c0419556a4a578bf9416f585fcbf1b7aa70ab exists, skipping\n", - "Layer sha256:4bfa6a63a3897359eff3ca3ee27c2e05ba76b790a07e6583714c1d324c2d4f21 exists, skipping\n", - "Layer sha256:5d8a6f34a39a1e098f09b39ee4e9d4a178fef6ec71c2046fe0b040c4667c8143 exists, skipping\n", - "Layer sha256:0ba512db704a2eb85f7f372d1c809d58589531e3bae794f0aaba86cee912f923 exists, skipping\n", - "Layer sha256:a5ba9de0ac70b35658f5898c27b52063a597d790308fb853021e881e04a6efb7 exists, skipping\n", - "Layer sha256:bad6918fba4b1c68f82d1a4b6063b3ce64975a73b33b38b35454b1d484a6b57b exists, skipping\n", - "Layer sha256:0fd02182c40eb28e13c4d7efd5dd4c81d985d9b07c9c809cc26e7bdb2dced07e exists, skipping\n", - "Layer sha256:541a15d3a9d79f7d3e5e0f552f396406b3e3093247f71e0ae71dd8b7242ec428 exists, skipping\n", - "Layer sha256:ecc17173ccb5b7692a6d31b0077b8e4f543fb45f8c2b5c252dcad9ad0c9be0f7 exists, skipping\n", - "Layer sha256:07e06c833ecb3b115e378d7f2ba5817ba77cfd02f5794a9817ede0622fbbf8a5 exists, skipping\n", - "Layer sha256:9ee379bde91a3cecfb08d4189af0a2bcecc2da1c5102e49443088ccd7bd9abfa exists, skipping\n", - "Layer sha256:90a7e2cb4d7460e55f83c6e47f9f8d089895ee6e1cc51ae5c23eab3bdcb70363 exists, skipping\n", - "Layer sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8 exists, skipping\n", - "Layer sha256:3885f9a80c70bf1aa3d3b925004fcca76334d45aa96d5e95412b40cae1dbdbba pushed.\n", - "Layer sha256:e94d45d512ce4033820c7df7dae67aa2d300528fed0ea5a53d6dcd099b2e4ca1 pushed.\n", - "Finished upload of: gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0\n", - "Pushed image gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0 in 3.6773080190178007s.\n" + "Building image using cluster builder.\n", + "Creating docker context: /tmp/fairing_context_vn0azn4y\n", + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "Waiting for fairing-builder-ns5b5 to start...\n", + "Waiting for fairing-builder-ns5b5 to start...\n", + "Waiting for fairing-builder-ns5b5 to start...\n", + "Waiting for fairing-builder-ns5b5 to start...\n", + "Pod started running True\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mINFO\u001b[0m[0006] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n", + "\u001b[36mINFO\u001b[0m[0006] Downloading base image gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\n", + "\u001b[33mWARN\u001b[0m[0006] Error while retrieving image from cache: getting image from path: open /cache/sha256:5aaccf0267f085afd976342a8e943a9c6cefccef5b554df4e15fa7bf15cbd7a3: no such file or directory\n", + "\u001b[36mINFO\u001b[0m[0007] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n", + "\u001b[36mINFO\u001b[0m[0007] Checking for cached layer gcr.io/jlewi-dev/fairing-job/fairing-job/cache:488afeebb3bcde81c38dc99f3de2f4ba06b85302f8440b625e8fe1b08c53ef72...\n", + "\u001b[36mINFO\u001b[0m[0007] Using caching version of cmd: RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", + "\u001b[36mINFO\u001b[0m[0007] Using files from context: [/kaniko/buildcontext/app]\n", + "\u001b[36mINFO\u001b[0m[0007] Taking snapshot of full filesystem...\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /dev, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /etc/secrets, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /kaniko, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /proc, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /sys, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] Skipping paths under /var/run, as it is a whitelisted directory\n", + "\u001b[36mINFO\u001b[0m[0007] WORKDIR /app/\n", + "\u001b[36mINFO\u001b[0m[0007] cmd: workdir\n", + "\u001b[36mINFO\u001b[0m[0007] Changed working directory to /app/\n", + "\u001b[36mINFO\u001b[0m[0007] Creating directory /app/\n", + "\u001b[36mINFO\u001b[0m[0007] Taking snapshot of files...\n", + "\u001b[36mINFO\u001b[0m[0007] ENV FAIRING_RUNTIME 1\n", + "\u001b[36mINFO\u001b[0m[0007] No files changed in this command, skipping snapshotting.\n", + "\u001b[36mINFO\u001b[0m[0007] Using files from context: [/kaniko/buildcontext/app/requirements.txt]\n", + "\u001b[36mINFO\u001b[0m[0007] COPY /app//requirements.txt /app/\n", + "\u001b[36mINFO\u001b[0m[0007] Taking snapshot of files...\n", + "\u001b[36mINFO\u001b[0m[0007] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", + "\u001b[36mINFO\u001b[0m[0007] Found cached layer, extracting to filesystem\n", + "\u001b[36mINFO\u001b[0m[0010] Taking snapshot of files...\n", + "\u001b[36mINFO\u001b[0m[0013] Using files from context: [/kaniko/buildcontext/app]\n", + "\u001b[36mINFO\u001b[0m[0013] COPY /app/ /app/\n", + "\u001b[36mINFO\u001b[0m[0013] Taking snapshot of files...\n", + "2019/08/18 00:32:59 existing blob: sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177\n", + "2019/08/18 00:32:59 existing blob: sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1\n", + "2019/08/18 00:32:59 existing blob: sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8\n", + "2019/08/18 00:32:59 existing blob: sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e\n", + "2019/08/18 00:32:59 existing blob: sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6\n", + "2019/08/18 00:32:59 existing blob: sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc\n", + "2019/08/18 00:32:59 existing blob: sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245\n", + "2019/08/18 00:32:59 existing blob: sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323\n", + "2019/08/18 00:32:59 existing blob: sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce\n", + "2019/08/18 00:32:59 existing blob: sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d\n", + "2019/08/18 00:32:59 existing blob: sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc\n", + "2019/08/18 00:32:59 existing blob: sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854\n", + "2019/08/18 00:32:59 existing blob: sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c\n", + "2019/08/18 00:32:59 existing blob: sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d\n", + "2019/08/18 00:32:59 existing blob: sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c\n", + "2019/08/18 00:32:59 existing blob: sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573\n", + "2019/08/18 00:32:59 existing blob: sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be\n", + "2019/08/18 00:32:59 existing blob: sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79\n", + "2019/08/18 00:32:59 existing blob: sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0\n", + "2019/08/18 00:32:59 existing blob: sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac\n", + "2019/08/18 00:32:59 existing blob: sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c\n", + "2019/08/18 00:33:01 pushed blob sha256:1d9677f30f0f4a3989f410d16a6829b3953729bb1971eab37e89535308f9bf0a\n", + "2019/08/18 00:33:01 pushed blob sha256:d76c933128c526784913ade49f5bb957fcdbf340eac405af17aaa01cb9c21650\n", + "2019/08/18 00:33:01 pushed blob sha256:9e6ab0086905f54f7c11a49bff522f9b9f8a7b85f6837e1ab9db5c13abee190e\n", + "2019/08/18 00:33:01 pushed blob sha256:a11325fad344d6f0b31cd2d1c0ac2f693faf6b7724fe43b07f42ce3e51bac3f2\n", + "2019/08/18 00:33:05 pushed blob sha256:c5aa14b34cb84e23688babb6c0c39c1591a7dfd7660c719b9d63bdcb8d2aa172\n", + "2019/08/18 00:33:05 gcr.io/jlewi-dev/fairing-job/fairing-job:A806F356: digest: sha256:5998eae5f15aa375edf498cc58f2cd0a701e80ad766b2c6c707e1166db889e35 size: 4325\n" ] } ], "source": [ + "# Use a stock jupyter image as our base image\n", + "base_image = \"gcr.io/kubeflow-images-public/tensorflow-1.13.1-notebook-cpu:v0.5.0\"\n", + "\n", + "cluster_builder = cluster.cluster.ClusterBuilder(registry=DOCKER_REGISTRY,\n", + " base_image=base_image,\n", + " preprocessor=preprocessor,\n", + " pod_spec_mutators=[fairing.cloud.gcp.add_gcp_credentials_if_exists],\n", + " context_source=cluster.gcs_context.GCSContextSource())\n", + "cluster_builder.build()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build the actual image\n", + "\n", + "Here you use the append builder to add your code to the base image\n", + "\n", + "* Calling preprocessor.preprocess() converts your notebook file to a python file\n", + "\n", + " * You are using the [ConvertNotebookPreprocessorWithFire](https://github.com/kubeflow/fairing/blob/master/fairing/preprocessors/converted_notebook.py#L85) \n", + " * This preprocessor converts ipynb files to py files by doing the following\n", + " 1. Removing all cells which don't have a comment `# fairing:include-cell`\n", + " 1. Using [python-fire](https://github.com/google/python-fire) to add entry points for the class specified in the constructor \n", + " \n", + " * Call preprocess() will create the file build-train-deploy.py\n", + " \n", + "* You use the AppendBuilder to rapidly build a new docker image by quickly adding some files to an existing docker image\n", + " * The AppendBuilder is super fast so its very convenient for rebuilding your images as you iterate on your code\n", + " * The AppendBuilder will add the converted notebook, build-train-deploy.py, along with any files specified in `preprocessor.input_files` to `/app` in the newly created image" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "Building image using Append builder...\n", + "Creating docker context: /tmp/fairing_context_v25n3350\n", + "Converting build-train-deploy.ipynb to build-train-deploy.py\n", + "Creating entry point for the class name ModelServe\n", + "build-train-deploy.py already exists in Fairing context, skipping...\n", + "Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:A806F356'\n", + "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", + "Successfully obtained Docker credentials.\n", + "Image successfully built in 1.0125336939818226s.\n", + "Pushing image gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF...\n", + "Loading Docker credentials for repository 'gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF'\n", + "Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n", + "Successfully obtained Docker credentials.\n", + "Uploading gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF\n", + "Layer sha256:8485e620dff15e8a69076ac02f6b23ffb3408161cdc2c0572905838765a84854 exists, skipping\n", + "Layer sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6 exists, skipping\n", + "Layer sha256:9ad0c8331ed7f0f76b54d8e91e66661a3ca35e02a25cc83ccb48d51fa89e5573 exists, skipping\n", + "Layer sha256:432f7fba907384de9a5c1c23aed93fa3eff7d6a8d89a91f5eab99f41aa889323 exists, skipping\n", + "Layer sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be exists, skipping\n", + "Layer sha256:9e6ab0086905f54f7c11a49bff522f9b9f8a7b85f6837e1ab9db5c13abee190e exists, skipping\n", + "Layer sha256:9ab35225e174496943b6a86bf62d004409479cf722ef1d3e01ca48afc8cfaa79 exists, skipping\n", + "Layer sha256:c451d20886c33c47dab7b01b05ece292ee5173a9a4aced925035401a6b1de62e exists, skipping\n", + "Layer sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1 exists, skipping\n", + "Layer sha256:c5aa14b34cb84e23688babb6c0c39c1591a7dfd7660c719b9d63bdcb8d2aa172 exists, skipping\n", + "Layer sha256:59951887a0c1d1a227f43219b3bc84562a6f2a7e0ab5c276fbd9eaba6ebec02d exists, skipping\n", + "Layer sha256:d76c933128c526784913ade49f5bb957fcdbf340eac405af17aaa01cb9c21650 exists, skipping\n", + "Layer sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245 exists, skipping\n", + "Layer sha256:47956fc6abae87d70180bc4f0efdad014b8e2a3b617a447ac01f674336737dfc exists, skipping\n", + "Layer sha256:bd5e67bf2947497b4a4347d2751797d6b3a40f0dc5d355185815ee6da1b8ae0c exists, skipping\n", + "Layer sha256:147c5bbff888fc9cddffd4078daa35bba0d1d6f6c7175a1acb144412a43b3fce exists, skipping\n", + "Layer sha256:86db56dbcdfc4e5ba205e00f3de178548dd0fcd3d1d9ec011747ca0bb08a8177 exists, skipping\n", + "Layer sha256:1d9677f30f0f4a3989f410d16a6829b3953729bb1971eab37e89535308f9bf0a exists, skipping\n", + "Layer sha256:62228d5c51598033083adbf71e8ee3d8d523d7d6d8c9d789b8c8a2d71ca988ac exists, skipping\n", + "Layer sha256:167108358fe643eea57fc595ff9b76a1a7e09e022c84d724346ce5b41d0148bc exists, skipping\n", + "Layer sha256:ff51e784988b3a953df5d6ba36b982436c2b16a77eb081ce7a589ca67d04144c exists, skipping\n", + "Layer sha256:22ea01b3a354ebdcf4386e6d2f53b6cf65bd9cdcb34a70f32e00b90a477589d0 exists, skipping\n", + "Layer sha256:969fc9c5501e60432ca0bc4b635493feb2f90e14822d2f3e3f79742fed96757d exists, skipping\n", + "Layer sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8 exists, skipping\n", + "Layer sha256:547e89bdafacadd9655a394a9d73c49c9890233c0cd244cbc5b1cb859be1395c exists, skipping\n", + "Layer sha256:2db167b6d8ec97ba1ed7e5c1cdb4104ed1a1747d8e19e5d0962a18a08db3577b pushed.\n", + "Layer sha256:d4f27c1c9950aa9a334fdf2a43dddeb0dc51840af8c62d8f4eb7e979023eced9 pushed.\n", + "Finished upload of: gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF\n", + "Pushed image gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF in 2.6799020239850506s.\n" + ] + } + ], + "source": [ + "preprocessor.preprocess()\n", + "\n", "builder = append.append.AppendBuilder(registry=DOCKER_REGISTRY,\n", " base_image=cluster_builder.image_tag, preprocessor=preprocessor)\n", "builder.build()\n" @@ -651,93 +907,159 @@ "source": [ "## Launch the K8s Job\n", "\n", - "* Use pod mutators to attach a PVC and credentials to the pod" + "* You can use fairing to easily launch a [Kubernetes job](https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/) to invoke code\n", + "* You use fairings Kubernetes job library to build a Kubernetes job\n", + " * You use pod mutators to attach GCP credentials to the pod\n", + " * You can also use pod mutators to attch PVCs\n", + "* Since the [ConvertNotebookPreprocessorWithFire](https://github.com/kubeflow/fairing/blob/master/fairing/preprocessors/converted_notebook.py#L85) is using [python-fire](https://github.com/google/python-fire) you can easily invoke any method inside the ModelServe class just by configuring the command invoked by the Kubernetes job\n", + " * In the cell below you extend the command to include `train` as an argument because you want to invoke the train\n", + " function\n", + " \n", + "**Note** When you invoke train_deployer.deploy; fairing will stream the logs from the Kubernetes job. The job will initially show some connection errors because the job will try to connect to the metadataserver. You can ignore these errors; the job will retry until its able to connect and then continue" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:fairing.deployers.job.job:Training job fairing-job-t429t launched.\n", - "WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n", - "WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n", - "WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n", - "INFO:fairing.kubernetes.manager:Pod started running True\n" + "The job fairing-job-ftbfh launched.\n", + "Waiting for fairing-job-ftbfh-tm7wg to start...\n", + "Waiting for fairing-job-ftbfh-tm7wg to start...\n", + "Waiting for fairing-job-ftbfh-tm7wg to start...\n", + "Waiting for fairing-job-ftbfh-tm7wg to start...\n", + "Waiting for fairing-job-ftbfh-tm7wg to start...\n", + "Pod started running True\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", + "Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused',)': /api/v1alpha1/artifacts\n", "model_file not supplied; using the default\n", "model_file=mockup-model.dat\n", - "[0]\tvalidation_0-rmse:90.6249\n", + "[0]\tvalidation_0-rmse:93.484\n", "Will train until validation_0-rmse hasn't improved in 40 rounds.\n", - "[1]\tvalidation_0-rmse:85.3672\n", - "[2]\tvalidation_0-rmse:80.6077\n", - "[3]\tvalidation_0-rmse:75.9867\n", - "[4]\tvalidation_0-rmse:72.15\n", - "[5]\tvalidation_0-rmse:68.4247\n", - "[6]\tvalidation_0-rmse:65.4166\n", - "[7]\tvalidation_0-rmse:62.7606\n", - "[8]\tvalidation_0-rmse:60.1438\n", - "[9]\tvalidation_0-rmse:57.9401\n", - "[10]\tvalidation_0-rmse:55.8747\n", - "[11]\tvalidation_0-rmse:53.957\n", - "[12]\tvalidation_0-rmse:52.2249\n", - "[13]\tvalidation_0-rmse:50.556\n", - "[14]\tvalidation_0-rmse:49.2282\n", - "[15]\tvalidation_0-rmse:47.8585\n", - "[16]\tvalidation_0-rmse:46.6933\n", - "[17]\tvalidation_0-rmse:45.5335\n", - "[18]\tvalidation_0-rmse:44.3206\n", - "[19]\tvalidation_0-rmse:43.2371\n", - "[20]\tvalidation_0-rmse:42.5117\n", - "[21]\tvalidation_0-rmse:41.6298\n", - "[22]\tvalidation_0-rmse:40.9242\n", - "[23]\tvalidation_0-rmse:40.1302\n", - "[24]\tvalidation_0-rmse:39.4707\n", - "[25]\tvalidation_0-rmse:38.8031\n", - "[26]\tvalidation_0-rmse:38.3108\n", - "[27]\tvalidation_0-rmse:37.689\n", - "[28]\tvalidation_0-rmse:37.1699\n", - "[29]\tvalidation_0-rmse:36.5853\n", - "[30]\tvalidation_0-rmse:36.3127\n", - "[31]\tvalidation_0-rmse:35.8365\n", - "[32]\tvalidation_0-rmse:35.4656\n", - "[33]\tvalidation_0-rmse:35.2841\n", - "[34]\tvalidation_0-rmse:35.0051\n", - "[35]\tvalidation_0-rmse:34.611\n", - "[36]\tvalidation_0-rmse:34.2154\n", - "[37]\tvalidation_0-rmse:34.1117\n", - "[38]\tvalidation_0-rmse:33.9263\n", - "[39]\tvalidation_0-rmse:33.6358\n", - "[40]\tvalidation_0-rmse:33.4676\n", - "[41]\tvalidation_0-rmse:33.3131\n", - "[42]\tvalidation_0-rmse:33.1974\n", - "[43]\tvalidation_0-rmse:32.9947\n", - "[44]\tvalidation_0-rmse:32.9613\n", - "[45]\tvalidation_0-rmse:32.8286\n", - "[46]\tvalidation_0-rmse:32.6034\n", - "[47]\tvalidation_0-rmse:32.4865\n", - "[48]\tvalidation_0-rmse:32.334\n", - "[49]\tvalidation_0-rmse:32.1416\n", - "mean_absolute_error=18.60\n", + "[1]\tvalidation_0-rmse:87.8587\n", + "[2]\tvalidation_0-rmse:82.3\n", + "[3]\tvalidation_0-rmse:77.6844\n", + "[4]\tvalidation_0-rmse:73.6202\n", + "[5]\tvalidation_0-rmse:70.051\n", + "[6]\tvalidation_0-rmse:66.3668\n", + "[7]\tvalidation_0-rmse:63.3949\n", + "[8]\tvalidation_0-rmse:60.7882\n", + "[9]\tvalidation_0-rmse:58.6315\n", + "[10]\tvalidation_0-rmse:56.6578\n", + "[11]\tvalidation_0-rmse:54.9536\n", + "[12]\tvalidation_0-rmse:53.4898\n", + "[13]\tvalidation_0-rmse:52.0987\n", + "[14]\tvalidation_0-rmse:50.3687\n", + "[15]\tvalidation_0-rmse:49.4893\n", + "[16]\tvalidation_0-rmse:47.7285\n", + "[17]\tvalidation_0-rmse:46.6625\n", + "[18]\tvalidation_0-rmse:45.5546\n", + "[19]\tvalidation_0-rmse:44.7106\n", + "[20]\tvalidation_0-rmse:43.926\n", + "[21]\tvalidation_0-rmse:43.1622\n", + "[22]\tvalidation_0-rmse:42.2623\n", + "[23]\tvalidation_0-rmse:41.4546\n", + "[24]\tvalidation_0-rmse:40.7816\n", + "[25]\tvalidation_0-rmse:39.9577\n", + "[26]\tvalidation_0-rmse:39.303\n", + "[27]\tvalidation_0-rmse:38.7521\n", + "[28]\tvalidation_0-rmse:38.1119\n", + "[29]\tvalidation_0-rmse:37.3337\n", + "[30]\tvalidation_0-rmse:37.2111\n", + "[31]\tvalidation_0-rmse:36.7978\n", + "[32]\tvalidation_0-rmse:36.0016\n", + "[33]\tvalidation_0-rmse:35.5342\n", + "[34]\tvalidation_0-rmse:35.3714\n", + "[35]\tvalidation_0-rmse:34.7494\n", + "[36]\tvalidation_0-rmse:34.5603\n", + "[37]\tvalidation_0-rmse:34.126\n", + "[38]\tvalidation_0-rmse:33.8422\n", + "[39]\tvalidation_0-rmse:33.6291\n", + "[40]\tvalidation_0-rmse:33.2366\n", + "[41]\tvalidation_0-rmse:32.8506\n", + "[42]\tvalidation_0-rmse:32.7166\n", + "[43]\tvalidation_0-rmse:32.6814\n", + "[44]\tvalidation_0-rmse:32.3616\n", + "[45]\tvalidation_0-rmse:32.1414\n", + "[46]\tvalidation_0-rmse:32.0641\n", + "[47]\tvalidation_0-rmse:31.6858\n", + "[48]\tvalidation_0-rmse:31.6368\n", + "[49]\tvalidation_0-rmse:31.3603\n", + "mean_absolute_error=22.90\n", "Model export success: mockup-model.dat\n", - "Best RMSE on eval: %.2f with %d rounds 32.141602 50\n" + "Best RMSE on eval: %.2f with %d rounds 31.360287 50\n" ] } ], "source": [ "pod_spec = builder.generate_pod_spec()\n", - "NAMESPACE = \"user1\"\n", - "train_deployer = job.job.Job(namespace=NAMESPACE, \n", - " cleanup=False,\n", + "train_deployer = job.job.Job(cleanup=False,\n", " pod_spec_mutators=[\n", " fairing.cloud.gcp.add_gcp_credentials_if_exists])\n", "\n", @@ -746,93 +1068,94 @@ "result = train_deployer.deploy(pod_spec)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* You can use kubectl to inspect the job that fairing created" + ] + }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "apiVersion: v1\r\n", - "items:\r\n", - "- apiVersion: batch/v1\r\n", - " kind: Job\r\n", - " metadata:\r\n", - " creationTimestamp: \"2019-06-12T20:21:53Z\"\r\n", - " generateName: fairing-job-\r\n", - " labels:\r\n", - " fairing-deployer: job\r\n", - " fairing-id: b7955e0a-8d4f-11e9-9207-96ec34699c76\r\n", - " name: fairing-job-t429t\r\n", - " namespace: user1\r\n", - " resourceVersion: \"7556018\"\r\n", - " selfLink: /apis/batch/v1/namespaces/user1/jobs/fairing-job-t429t\r\n", - " uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n", - " spec:\r\n", - " backoffLimit: 0\r\n", - " completions: 1\r\n", - " parallelism: 1\r\n", - " selector:\r\n", - " matchLabels:\r\n", - " controller-uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n", - " template:\r\n", - " metadata:\r\n", - " creationTimestamp: null\r\n", - " labels:\r\n", - " controller-uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n", - " fairing-deployer: job\r\n", - " fairing-id: b7955e0a-8d4f-11e9-9207-96ec34699c76\r\n", - " job-name: fairing-job-t429t\r\n", - " name: fairing-deployer\r\n", - " spec:\r\n", - " containers:\r\n", - " - command:\r\n", - " - python\r\n", - " - /app/mockup-data-xgboost-build-train-deploy.py\r\n", - " - train\r\n", - " env:\r\n", - " - name: FAIRING_RUNTIME\r\n", - " value: \"1\"\r\n", - " - name: GOOGLE_APPLICATION_CREDENTIALS\r\n", - " value: /etc/secrets/user-gcp-sa.json\r\n", - " image: gcr.io/zahrakubeflowcodelab/fairing-job/fairing-job:6F63F28C\r\n", - " imagePullPolicy: IfNotPresent\r\n", - " name: fairing-job\r\n", - " resources: {}\r\n", - " securityContext:\r\n", - " runAsUser: 0\r\n", - " terminationMessagePath: /dev/termination-log\r\n", - " terminationMessagePolicy: File\r\n", - " volumeMounts:\r\n", - " - mountPath: /etc/secrets\r\n", - " name: user-gcp-sa\r\n", - " readOnly: true\r\n", - " workingDir: /app/\r\n", - " dnsPolicy: ClusterFirst\r\n", - " restartPolicy: Never\r\n", - " schedulerName: default-scheduler\r\n", - " securityContext: {}\r\n", - " terminationGracePeriodSeconds: 30\r\n", - " volumes:\r\n", - " - name: user-gcp-sa\r\n", - " secret:\r\n", - " defaultMode: 420\r\n", - " secretName: user-gcp-sa\r\n", - " status:\r\n", - " completionTime: \"2019-06-12T20:22:00Z\"\r\n", - " conditions:\r\n", - " - lastProbeTime: \"2019-06-12T20:22:00Z\"\r\n", - " lastTransitionTime: \"2019-06-12T20:22:00Z\"\r\n", - " status: \"True\"\r\n", - " type: Complete\r\n", - " startTime: \"2019-06-12T20:21:53Z\"\r\n", - " succeeded: 1\r\n", - "kind: List\r\n", - "metadata:\r\n", - " resourceVersion: \"\"\r\n", - " selfLink: \"\"\r\n" + "apiVersion: v1\n", + "items:\n", + "- apiVersion: batch/v1\n", + " kind: Job\n", + " metadata:\n", + " creationTimestamp: \"2019-08-18T00:33:14Z\"\n", + " generateName: fairing-job-\n", + " labels:\n", + " fairing-deployer: job\n", + " fairing-id: c423b296-c14f-11e9-8c24-dade65de3221\n", + " name: fairing-job-ftbfh\n", + " namespace: kubeflow-jlewi\n", + " resourceVersion: \"851927\"\n", + " selfLink: /apis/batch/v1/namespaces/kubeflow-jlewi/jobs/fairing-job-ftbfh\n", + " uid: c42a1de3-c14f-11e9-ab37-42010a8e00c9\n", + " spec:\n", + " backoffLimit: 0\n", + " completions: 1\n", + " parallelism: 1\n", + " selector:\n", + " matchLabels:\n", + " controller-uid: c42a1de3-c14f-11e9-ab37-42010a8e00c9\n", + " template:\n", + " metadata:\n", + " creationTimestamp: null\n", + " labels:\n", + " controller-uid: c42a1de3-c14f-11e9-ab37-42010a8e00c9\n", + " fairing-deployer: job\n", + " fairing-id: c423b296-c14f-11e9-8c24-dade65de3221\n", + " job-name: fairing-job-ftbfh\n", + " name: fairing-deployer\n", + " spec:\n", + " containers:\n", + " - command:\n", + " - python\n", + " - /app/build-train-deploy.py\n", + " - train\n", + " env:\n", + " - name: FAIRING_RUNTIME\n", + " value: \"1\"\n", + " - name: GOOGLE_APPLICATION_CREDENTIALS\n", + " value: /etc/secrets/user-gcp-sa.json\n", + " image: gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF\n", + " imagePullPolicy: IfNotPresent\n", + " name: fairing-job\n", + " resources: {}\n", + " securityContext:\n", + " runAsUser: 0\n", + " terminationMessagePath: /dev/termination-log\n", + " terminationMessagePolicy: File\n", + " volumeMounts:\n", + " - mountPath: /etc/secrets\n", + " name: user-gcp-sa\n", + " readOnly: true\n", + " workingDir: /app/\n", + " dnsPolicy: ClusterFirst\n", + " restartPolicy: Never\n", + " schedulerName: default-scheduler\n", + " securityContext: {}\n", + " terminationGracePeriodSeconds: 30\n", + " volumes:\n", + " - name: user-gcp-sa\n", + " secret:\n", + " defaultMode: 420\n", + " secretName: user-gcp-sa\n", + " status:\n", + " active: 1\n", + " startTime: \"2019-08-18T00:33:14Z\"\n", + "kind: List\n", + "metadata:\n", + " resourceVersion: \"\"\n", + " selfLink: \"\"\n" ] } ], @@ -847,16 +1170,27 @@ "## Deploy the trained model to Kubeflow for predictions" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Now that you have trained a model you can use fairing to deploy it on Kubernetes\n", + "* When you call deployer.deploy fairing will create a [Kubernetes Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) to serve your model\n", + "* Fairing uses the docker image you created earlier\n", + "* The docker image you created contains your code and [Seldon core](https://www.seldon.io/)\n", + "* Fairing uses Seldon to wrap your prediction code, ModelServe.predict, in a REST and gRPC server" + ] + }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO:root:Cluster endpoint: http://fairing-service-jjgxd.user1.svc.cluster.local\n" + "Cluster endpoint: http://fairing-service-cxwv5.kubeflow-jlewi.svc.cluster.local\n" ] } ], @@ -872,98 +1206,104 @@ "url = deployer.deploy(pod_spec)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* You can use kubectl to inspect the deployment that fairing created" + ] + }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "apiVersion: extensions/v1beta1\r\n", - "kind: Deployment\r\n", - "metadata:\r\n", - " annotations:\r\n", - " deployment.kubernetes.io/revision: \"1\"\r\n", - " creationTimestamp: \"2019-06-12T20:22:27Z\"\r\n", - " generateName: fairing-deployer-\r\n", - " generation: 1\r\n", - " labels:\r\n", - " app: mockup\r\n", - " fairing-deployer: serving\r\n", - " fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n", - " name: fairing-deployer-cltbb\r\n", - " namespace: user1\r\n", - " resourceVersion: \"7556174\"\r\n", - " selfLink: /apis/extensions/v1beta1/namespaces/user1/deployments/fairing-deployer-cltbb\r\n", - " uid: cbc54e8f-8d4f-11e9-b008-42010a8e01a5\r\n", - "spec:\r\n", - " progressDeadlineSeconds: 600\r\n", - " replicas: 1\r\n", - " revisionHistoryLimit: 10\r\n", - " selector:\r\n", - " matchLabels:\r\n", - " app: mockup\r\n", - " fairing-deployer: serving\r\n", - " fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n", - " strategy:\r\n", - " rollingUpdate:\r\n", - " maxSurge: 25%\r\n", - " maxUnavailable: 25%\r\n", - " type: RollingUpdate\r\n", - " template:\r\n", - " metadata:\r\n", - " creationTimestamp: null\r\n", - " labels:\r\n", - " app: mockup\r\n", - " fairing-deployer: serving\r\n", - " fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n", - " name: fairing-deployer\r\n", - " spec:\r\n", - " containers:\r\n", - " - command:\r\n", - " - seldon-core-microservice\r\n", - " - mockup-data-xgboost-build-train-deploy.ModelServe\r\n", - " - REST\r\n", - " - --service-type=MODEL\r\n", - " - --persistence=0\r\n", - " env:\r\n", - " - name: FAIRING_RUNTIME\r\n", - " value: \"1\"\r\n", - " image: gcr.io/zahrakubeflowcodelab/fairing-job/fairing-job:6F63F28C\r\n", - " imagePullPolicy: IfNotPresent\r\n", - " name: model\r\n", - " resources: {}\r\n", - " securityContext:\r\n", - " runAsUser: 0\r\n", - " terminationMessagePath: /dev/termination-log\r\n", - " terminationMessagePolicy: File\r\n", - " workingDir: /app/\r\n", - " dnsPolicy: ClusterFirst\r\n", - " restartPolicy: Always\r\n", - " schedulerName: default-scheduler\r\n", - " securityContext: {}\r\n", - " terminationGracePeriodSeconds: 30\r\n", - "status:\r\n", - " availableReplicas: 1\r\n", - " conditions:\r\n", - " - lastTransitionTime: \"2019-06-12T20:22:29Z\"\r\n", - " lastUpdateTime: \"2019-06-12T20:22:29Z\"\r\n", - " message: Deployment has minimum availability.\r\n", - " reason: MinimumReplicasAvailable\r\n", - " status: \"True\"\r\n", - " type: Available\r\n", - " - lastTransitionTime: \"2019-06-12T20:22:27Z\"\r\n", - " lastUpdateTime: \"2019-06-12T20:22:29Z\"\r\n", - " message: ReplicaSet \"fairing-deployer-cltbb-864d4d6f8f\" has successfully progressed.\r\n", - " reason: NewReplicaSetAvailable\r\n", - " status: \"True\"\r\n", - " type: Progressing\r\n", - " observedGeneration: 1\r\n", - " readyReplicas: 1\r\n", - " replicas: 1\r\n", - " updatedReplicas: 1\r\n" + "apiVersion: extensions/v1beta1\n", + "kind: Deployment\n", + "metadata:\n", + " annotations:\n", + " deployment.kubernetes.io/revision: \"1\"\n", + " creationTimestamp: \"2019-08-18T00:33:33Z\"\n", + " generateName: fairing-deployer-\n", + " generation: 1\n", + " labels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: cf47840e-c14f-11e9-8c24-dade65de3221\n", + " name: fairing-deployer-lcwvh\n", + " namespace: kubeflow-jlewi\n", + " resourceVersion: \"852066\"\n", + " selfLink: /apis/extensions/v1beta1/namespaces/kubeflow-jlewi/deployments/fairing-deployer-lcwvh\n", + " uid: cf49d3a5-c14f-11e9-ab37-42010a8e00c9\n", + "spec:\n", + " progressDeadlineSeconds: 600\n", + " replicas: 1\n", + " revisionHistoryLimit: 10\n", + " selector:\n", + " matchLabels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: cf47840e-c14f-11e9-8c24-dade65de3221\n", + " strategy:\n", + " rollingUpdate:\n", + " maxSurge: 25%\n", + " maxUnavailable: 25%\n", + " type: RollingUpdate\n", + " template:\n", + " metadata:\n", + " creationTimestamp: null\n", + " labels:\n", + " app: mockup\n", + " fairing-deployer: serving\n", + " fairing-id: cf47840e-c14f-11e9-8c24-dade65de3221\n", + " name: fairing-deployer\n", + " spec:\n", + " containers:\n", + " - command:\n", + " - seldon-core-microservice\n", + " - build-train-deploy.ModelServe\n", + " - REST\n", + " - --service-type=MODEL\n", + " - --persistence=0\n", + " env:\n", + " - name: FAIRING_RUNTIME\n", + " value: \"1\"\n", + " image: gcr.io/jlewi-dev/fairing-job/fairing-job:A84D11BF\n", + " imagePullPolicy: IfNotPresent\n", + " name: model\n", + " resources: {}\n", + " securityContext:\n", + " runAsUser: 0\n", + " terminationMessagePath: /dev/termination-log\n", + " terminationMessagePolicy: File\n", + " workingDir: /app/\n", + " dnsPolicy: ClusterFirst\n", + " restartPolicy: Always\n", + " schedulerName: default-scheduler\n", + " securityContext: {}\n", + " terminationGracePeriodSeconds: 30\n", + "status:\n", + " conditions:\n", + " - lastTransitionTime: \"2019-08-18T00:33:33Z\"\n", + " lastUpdateTime: \"2019-08-18T00:33:33Z\"\n", + " message: Deployment does not have minimum availability.\n", + " reason: MinimumReplicasUnavailable\n", + " status: \"False\"\n", + " type: Available\n", + " - lastTransitionTime: \"2019-08-18T00:33:33Z\"\n", + " lastUpdateTime: \"2019-08-18T00:33:33Z\"\n", + " message: ReplicaSet \"fairing-deployer-lcwvh-64bbfd968c\" is progressing.\n", + " reason: ReplicaSetUpdated\n", + " status: \"True\"\n", + " type: Progressing\n", + " observedGeneration: 1\n", + " replicas: 1\n", + " unavailableReplicas: 1\n", + " updatedReplicas: 1\n" ] } ], @@ -975,33 +1315,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Call the prediction endpoint\n", + "## Send an inference request to the prediction server\n", "\n", - "Create a test dataset, then call the endpoint on Kubeflow for predictions." + "* Now that you have deployed the model into your Kubernetes cluster, you can send a REST request to \n", + " preform inference\n", + "* The code below reads some data, sends, a prediction request and then prints out the response" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "(train_X, train_y), (test_X, test_y) =read_synthetic_input()\n" + "(train_X, train_y), (test_X, test_y) = read_synthetic_input()\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(b'\\n500 Inter'\n", - " b'nal Server Error\\n

Internal Server Error

\\n

The server en'\n", - " b'countered an internal error and was unable to complete your request. Either '\n", - " b'the server is overloaded or there is an error in the application.

\\n')\n" + "(b'{\"data\":{\"names\":[\"t:0\",\"t:1\"],\"tensor\":{\"shape\":[1,2],\"values\":[-27.2208251'\n", + " b'953125,-36.85965347290039]}},\"meta\":{}}\\n')\n" ] } ], @@ -1017,12 +1357,13 @@ "source": [ "## Clean up the prediction endpoint\n", "\n", - "Delete the prediction endpoint created by this notebook." + "* You can use kubectl to delete the Kubernetes resources for your model\n", + "* If you want to delete the resources uncomment the following lines and run them" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -1034,16 +1375,421 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Build a simple 1 step pipeline" + "## Track Models and Artifacts\n", + "\n", + "* Using Kubeflow's metadata server you can track models and artifacts\n", + "* The ModelServe code was instrumented to log executions and outputs\n", + "* You can access Kubeflow's metadata UI by selecting **Artifact Store** from the central dashboard\n", + " * See [here](https://www.kubeflow.org/docs/other-guides/accessing-uis/) for instructions on connecting to Kubeflow's UIs\n", + "* You can also use the python SDK to read and write entries\n", + "* This [notebook](https://github.com/kubeflow/metadata/blob/master/sdk/python/demo.ipynb) illustrates a bunch of metadata functionality" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a workspace\n", + "\n", + "* Kubeflow metadata uses workspaces as a logical grouping for artifacts, executions, and datasets that belong together\n", + "* Earlier in the notebook we defined the function `create_workspace` to create a workspace for this example\n", + "* You can use that function to return a workspace object and then call list to see all the artifacts in that workspace" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': '4',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T17:37:21.338937',\n", + " 'create_time': '2019-08-16T17:37:21.569390Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T17:37:21.569356',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '7',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T17:37:40.551731',\n", + " 'create_time': '2019-08-16T17:37:40.762260Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T17:37:40.762231',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '10',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T17:37:42.075423',\n", + " 'create_time': '2019-08-16T17:37:42.288795Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T17:37:42.288767',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '13',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T17:37:43.596662',\n", + " 'create_time': '2019-08-16T17:37:43.797385Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T17:37:43.797357',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '16',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T17:37:45.697220',\n", + " 'create_time': '2019-08-16T17:37:45.902046Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T17:37:45.901973',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '19',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:10:53.508578',\n", + " 'create_time': '2019-08-16T22:10:53.761012Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:10:53.760970',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '22',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:10:56.902050',\n", + " 'create_time': '2019-08-16T22:10:57.126435Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:10:57.126410',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '25',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:34:04.140210',\n", + " 'create_time': '2019-08-16T22:34:04.371380Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:34:04.371342',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '28',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:35:42.154942',\n", + " 'create_time': '2019-08-16T22:35:42.400014Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:35:42.399985',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '31',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:50:00.175507',\n", + " 'create_time': '2019-08-16T22:50:00.398220Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:50:00.398189',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '34',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:50:09.199737',\n", + " 'create_time': '2019-08-16T22:50:09.409746Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:50:09.409716',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '37',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:55:11.185839',\n", + " 'create_time': '2019-08-16T22:55:11.426015Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:55:11.425985',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '40',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T22:56:58.371363',\n", + " 'create_time': '2019-08-16T22:56:58.545544Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T22:56:58.545506',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '43',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T23:35:08.335115',\n", + " 'create_time': '2019-08-16T23:35:08.520758Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T23:35:08.520583',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '46',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T23:44:46.889414',\n", + " 'create_time': '2019-08-16T23:44:47.131904Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T23:44:47.131868',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '49',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T23:51:34.813950',\n", + " 'create_time': '2019-08-16T23:51:35.084604Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T23:51:35.084556',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '52',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-16T23:57:01.558106',\n", + " 'create_time': '2019-08-16T23:57:01.750162Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-16T23:57:01.750132',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '55',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T00:02:40.138471',\n", + " 'create_time': '2019-08-17T00:02:40.409795Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T00:02:40.409763',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '58',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T00:04:02.248451',\n", + " 'create_time': '2019-08-17T00:04:02.407539Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T00:04:02.407509',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '61',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T01:13:43.418039',\n", + " 'create_time': '2019-08-17T01:13:43.676249Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T01:13:43.676202',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '64',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T01:48:09.879955',\n", + " 'create_time': '2019-08-17T01:48:10.090521Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T01:48:10.090478',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '67',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T20:14:31.424733',\n", + " 'create_time': '2019-08-17T20:14:31.624324Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T20:14:31.624290',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '70',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T21:31:07.792382',\n", + " 'create_time': '2019-08-17T21:31:07.979491Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T21:31:07.979460',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '73',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T21:32:19.864465',\n", + " 'create_time': '2019-08-17T21:32:20.051897Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T21:32:20.051859',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '76',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T21:34:13.957359',\n", + " 'create_time': '2019-08-17T21:34:14.154483Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T21:34:14.154453',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '79',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-17T21:34:40.198663',\n", + " 'create_time': '2019-08-17T21:34:40.386833Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-17T21:34:40.386797',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '82',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-18T00:32:40.451885',\n", + " 'create_time': '2019-08-18T00:32:40.678094Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-18T00:32:40.678065',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None},\n", + " {'id': '85',\n", + " 'workspace': 'xgboost-synthetic',\n", + " 'run': 'xgboost-synthetic-faring-run2019-08-18T00:33:27.326993',\n", + " 'create_time': '2019-08-18T00:33:27.509801Z',\n", + " 'description': 'housing price prediction model using synthetic data',\n", + " 'model_type': 'linear_regression',\n", + " 'name': 'housing-price-model',\n", + " 'owner': 'someone@kubeflow.org',\n", + " 'version': '2019-08-18T00:33:27.509766',\n", + " 'uri': 'mockup-model.dat',\n", + " 'training_framework': {'name': 'xgboost', 'version': '0.9.0'},\n", + " 'hyperparameters': {'learning_rate': 0.1, 'n_estimators': 50},\n", + " 'labels': None}]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "EXPERIMENT_NAME = 'MockupModel'" + "ws = create_workspace()\n", + "ws.list()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a pipeline to train your model\n", + "\n", + "* [Kubeflow pipelines](https://www.kubeflow.org/docs/pipelines/) makes it easy to define complex workflows to build and deploy models\n", + "* Below you will define and run a simple one step pipeline to train your model\n", + "* Kubeflow pipelines uses experiments to group different runs of a pipeline together\n", + "* So you start by defining a name for your experiement" ] }, { @@ -1051,12 +1797,21 @@ "metadata": {}, "source": [ "#### Define the pipeline\n", - "Pipeline function has to be decorated with the `@dsl.pipeline` decorator" + "\n", + "* To create a pipeline you create a function and decorate it with the `@dsl.pipeline` decorator\n", + " * You use the decorator to give the pipeline a name and description\n", + " \n", + "* Inside the function, each step in the function is defined by a ContainerOp that specifies\n", + " a container to invoke\n", + " \n", + "* You will use the container image that you built earlier using Fairing\n", + "* Since the Fairing preprocessor added a main function using [python-fire](https://github.com/google/python-fire), a step in your pipeline can invocation any function in the ModelServe class just by setting the command for the container op\n", + "* See the pipelines [SDK reference](https://kubeflow-pipelines.readthedocs.io/en/latest/) for more information" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -1084,9 +1839,16 @@ "#### Compile the pipeline" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Pipelines need to be compiled" + ] + }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1102,22 +1864,24 @@ "#### Submit the pipeline for execution" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Pipelines groups runs using experiments\n", + "* So before you submit a pipeline you need to create an experiment or pick an existing experiment\n", + "* Once you have compiled a pipeline, you can use the pipelines SDK to submit that pipeline\n" + ] + }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:Creating experiment MockupModel.\n" - ] - }, { "data": { "text/html": [ - "Experiment link here" + "Experiment link here" ], "text/plain": [ "" @@ -1129,7 +1893,7 @@ { "data": { "text/html": [ - "Run link here" + "Run link here" ], "text/plain": [ "" @@ -1140,6 +1904,8 @@ } ], "source": [ + "EXPERIMENT_NAME = 'MockupModel'\n", + "\n", "#Specify pipeline argument values\n", "arguments = {}\n", "\n", @@ -1153,13 +1919,6 @@ "\n", "#vvvvvvvvv This link leads to the run information page. (Note: There is a bug in JupyterLab that modifies the URL and makes the link stop working)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/xgboost_synthetic/images/addgcpsecret.png b/xgboost_synthetic/images/addgcpsecret.png new file mode 100644 index 00000000..efc6bc8d Binary files /dev/null and b/xgboost_synthetic/images/addgcpsecret.png differ diff --git a/xgboost_synthetic/requirements.txt b/xgboost_synthetic/requirements.txt index 453a6c98..13341a39 100644 --- a/xgboost_synthetic/requirements.txt +++ b/xgboost_synthetic/requirements.txt @@ -3,6 +3,7 @@ fire gitpython google-cloud-storage joblib +kfmd numpy pandas retrying