mirror of https://github.com/kubeflow/examples.git
1187 lines
46 KiB
Plaintext
1187 lines
46 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Train and deploy on Kubeflow from Notebooks\n",
|
|
"\n",
|
|
"This notebook introduces you to using Kubeflow Fairing to train and deploy a model to Kubeflow on Google Kubernetes Engine (GKE), and Kubeflow Pipeline to build a simple pipeline and deploy on GKE. This notebook demonstrate how to:\n",
|
|
" \n",
|
|
"* Train an XGBoost model in a local notebook,\n",
|
|
"* Use Kubeflow Fairing to train an XGBoost model remotely on Kubeflow,\n",
|
|
" * For simplicity code-generated synthetic data is used.\n",
|
|
" * The append builder is used to rapidly build a docker image.\n",
|
|
"* Use Kubeflow Fairing to deploy a trained model to Kubeflow, and Call the deployed endpoint for predictions.\n",
|
|
"* Use a simple pipeline to train a model in GKE. \n",
|
|
"\n",
|
|
"To learn more about how to run this notebook locally, see the guide to [training and deploying on GCP from a local notebook][gcp-local-notebook].\n",
|
|
"\n",
|
|
"[gcp-local-notebook]: https://kubeflow.org/docs/fairing/gcp/tutorials/gcp-local-notebook/"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Set up your notebook for training an XGBoost model\n",
|
|
"\n",
|
|
"Import the libraries required to train this model."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip3 install retrying\n",
|
|
"!pip3 install fairing\n",
|
|
"!pip3 install kfmd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import util\n",
|
|
"from pathlib import Path\n",
|
|
"import os\n",
|
|
"\n",
|
|
"util.notebook_setup()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# fairing:include-cell\n",
|
|
"import fire\n",
|
|
"import joblib\n",
|
|
"import logging\n",
|
|
"import kfmd\n",
|
|
"import nbconvert\n",
|
|
"import os\n",
|
|
"import pathlib\n",
|
|
"import sys\n",
|
|
"from pathlib import Path\n",
|
|
"import pandas as pd\n",
|
|
"import pprint\n",
|
|
"from sklearn.metrics import mean_absolute_error\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.impute import SimpleImputer\n",
|
|
"from xgboost import XGBRegressor\n",
|
|
"from importlib import reload\n",
|
|
"from sklearn.datasets import make_regression\n",
|
|
"from kfmd import metadata\n",
|
|
"from datetime import datetime\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Imports not to be included in the built docker image\n",
|
|
"import kfp\n",
|
|
"import kfp.components as comp\n",
|
|
"import kfp.gcp as gcp\n",
|
|
"import kfp.dsl as dsl\n",
|
|
"import kfp.compiler as compiler\n",
|
|
"from kubernetes import client as k8s_client\n",
|
|
"import fairing \n",
|
|
"from fairing.builders import append\n",
|
|
"from fairing.deployers import job\n",
|
|
"from fairing.preprocessors.converted_notebook import ConvertNotebookPreprocessorWithFire\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# fairing:include-cell\n",
|
|
"def read_synthetic_input(test_size=0.25):\n",
|
|
" \"\"\"generate synthetic data and split it into train and test.\"\"\"\n",
|
|
" # generate regression dataset\n",
|
|
" X, y = make_regression(n_samples=200, n_features=5, noise=0.1)\n",
|
|
" train_X, test_X, train_y, test_y = train_test_split(X,\n",
|
|
" y,\n",
|
|
" test_size=test_size,\n",
|
|
" shuffle=False)\n",
|
|
"\n",
|
|
" imputer = SimpleImputer()\n",
|
|
" train_X = imputer.fit_transform(train_X)\n",
|
|
" test_X = imputer.transform(test_X)\n",
|
|
"\n",
|
|
" return (train_X, train_y), (test_X, test_y)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# fairing:include-cell\n",
|
|
"def train_model(train_X,\n",
|
|
" train_y,\n",
|
|
" test_X,\n",
|
|
" test_y,\n",
|
|
" n_estimators,\n",
|
|
" learning_rate):\n",
|
|
" \"\"\"Train the model using XGBRegressor.\"\"\"\n",
|
|
" model = XGBRegressor(n_estimators=n_estimators, learning_rate=learning_rate)\n",
|
|
"\n",
|
|
" model.fit(train_X,\n",
|
|
" train_y,\n",
|
|
" early_stopping_rounds=40,\n",
|
|
" eval_set=[(test_X, test_y)])\n",
|
|
"\n",
|
|
" print(\"Best RMSE on eval: %.2f with %d rounds\",\n",
|
|
" model.best_score,\n",
|
|
" model.best_iteration+1)\n",
|
|
" return model\n",
|
|
"\n",
|
|
"def eval_model(model, test_X, test_y):\n",
|
|
" \"\"\"Evaluate the model performance.\"\"\"\n",
|
|
" predictions = model.predict(test_X)\n",
|
|
" mae=mean_absolute_error(predictions, test_y)\n",
|
|
" logging.info(\"mean_absolute_error=%.2f\", mae)\n",
|
|
" return mae\n",
|
|
"\n",
|
|
"def save_model(model, model_file):\n",
|
|
" \"\"\"Save XGBoost model for serving.\"\"\"\n",
|
|
" joblib.dump(model, model_file)\n",
|
|
" logging.info(\"Model export success: %s\", model_file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Define various constants"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Define Train and Predict functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# fairing:include-cell\n",
|
|
"class ModelServe(object):\n",
|
|
" \n",
|
|
" def __init__(self, model_file=None):\n",
|
|
" self.n_estimators = 50\n",
|
|
" self.learning_rate = 0.1\n",
|
|
" if not model_file:\n",
|
|
" if \"MODEL_FILE\" in os.environ:\n",
|
|
" print(\"model_file not supplied; checking environment variable\")\n",
|
|
" model_file = os.getenv(\"MODEL_FILE\")\n",
|
|
" else:\n",
|
|
" print(\"model_file not supplied; using the default\")\n",
|
|
" model_file = \"mockup-model.dat\"\n",
|
|
" \n",
|
|
" self.model_file = model_file\n",
|
|
" print(\"model_file={0}\".format(self.model_file))\n",
|
|
" \n",
|
|
" self.model = None\n",
|
|
" self.exec = self.create_execution()\n",
|
|
"\n",
|
|
" def train(self):\n",
|
|
" (train_X, train_y), (test_X, test_y) = read_synthetic_input()\n",
|
|
" self.exec.log_input(metadata.DataSet(\n",
|
|
" description=\"xgboost synthetic data\",\n",
|
|
" name=\"synthetic-data\",\n",
|
|
" owner=\"someone@kubeflow.org\",\n",
|
|
" uri=\"file://path/to/dataset\",\n",
|
|
" version=\"v1.0.0\"))\n",
|
|
" \n",
|
|
" model = train_model(train_X,\n",
|
|
" train_y,\n",
|
|
" test_X,\n",
|
|
" test_y,\n",
|
|
" self.n_estimators,\n",
|
|
" self.learning_rate)\n",
|
|
"\n",
|
|
" mae = eval_model(model, test_X, test_y)\n",
|
|
" self.exec.log_output(metadata.Metrics(\n",
|
|
" name=\"xgboost-synthetic-traing-eval\",\n",
|
|
" owner=\"someone@kubeflow.org\",\n",
|
|
" description=\"training evaluation for xgboost synthetic\",\n",
|
|
" uri=\"gcs://path/to/metrics\",\n",
|
|
" metrics_type=metadata.Metrics.VALIDATION,\n",
|
|
" values={\"mean_absolute_error\": mae}))\n",
|
|
" \n",
|
|
" save_model(model, self.model_file)\n",
|
|
" self.exec.log_output(metadata.Model(\n",
|
|
" name=\"housing-price-model\",\n",
|
|
" description=\"housing price prediction model using synthetic data\",\n",
|
|
" owner=\"someone@kubeflow.org\",\n",
|
|
" uri=self.model_file,\n",
|
|
" model_type=\"linear_regression\",\n",
|
|
" training_framework={\n",
|
|
" \"name\": \"xgboost\",\n",
|
|
" \"version\": \"0.9.0\"\n",
|
|
" },\n",
|
|
" hyperparameters={\n",
|
|
" \"learning_rate\": self.learning_rate,\n",
|
|
" \"n_estimators\": self.n_estimators\n",
|
|
" },\n",
|
|
" version=datetime.utcnow().isoformat(\"T\")))\n",
|
|
" \n",
|
|
" def predict(self, X, feature_names):\n",
|
|
" \"\"\"Predict using the model for given ndarray.\"\"\"\n",
|
|
" if not self.model:\n",
|
|
" self.model = joblib.load(self.model_file)\n",
|
|
" # Do any preprocessing\n",
|
|
" prediction = self.model.predict(data=X)\n",
|
|
" # Do any postprocessing\n",
|
|
" return [[prediction.item(0), prediction.item(1)]]\n",
|
|
" \n",
|
|
" def create_execution(self):\n",
|
|
" workspace = metadata.Workspace(\n",
|
|
" # Connect to metadata-service in namesapce kubeflow in k8s cluster.\n",
|
|
" backend_url_prefix=\"metadata-service.kubeflow:8080\",\n",
|
|
" name=\"xgboost-synthetic\",\n",
|
|
" description=\"workspace for xgboost-synthetic artifacts and executions\")\n",
|
|
" \n",
|
|
" r = metadata.Run(\n",
|
|
" workspace=workspace,\n",
|
|
" name=\"xgboost-synthetic-faring-run\" + datetime.utcnow().isoformat(\"T\"),\n",
|
|
" description=\"a notebook run\")\n",
|
|
"\n",
|
|
" return metadata.Execution(\n",
|
|
" name = \"execution\" + datetime.utcnow().isoformat(\"T\"),\n",
|
|
" workspace=workspace,\n",
|
|
" run=r,\n",
|
|
" description=\"execution for training xgboost-synthetic\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Train your Model Locally\n",
|
|
"\n",
|
|
"* Train your model locally inside your notebook"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"model_file=mockup-model.dat\n",
|
|
"[0]\tvalidation_0-rmse:145.743\n",
|
|
"Will train until validation_0-rmse hasn't improved in 40 rounds.\n",
|
|
"[1]\tvalidation_0-rmse:137.786\n",
|
|
"[2]\tvalidation_0-rmse:129.221\n",
|
|
"[3]\tvalidation_0-rmse:122.795\n",
|
|
"[4]\tvalidation_0-rmse:117.913\n",
|
|
"[5]\tvalidation_0-rmse:113.441\n",
|
|
"[6]\tvalidation_0-rmse:108.843\n",
|
|
"[7]\tvalidation_0-rmse:104.968\n",
|
|
"[8]\tvalidation_0-rmse:101.756\n",
|
|
"[9]\tvalidation_0-rmse:98.9659\n",
|
|
"[10]\tvalidation_0-rmse:96.2215\n",
|
|
"[11]\tvalidation_0-rmse:93.6806\n",
|
|
"[12]\tvalidation_0-rmse:90.5423\n",
|
|
"[13]\tvalidation_0-rmse:88.1216\n",
|
|
"[14]\tvalidation_0-rmse:85.4835\n",
|
|
"[15]\tvalidation_0-rmse:83.1785\n",
|
|
"[16]\tvalidation_0-rmse:80.9087\n",
|
|
"[17]\tvalidation_0-rmse:78.916\n",
|
|
"[18]\tvalidation_0-rmse:77.5187\n",
|
|
"[19]\tvalidation_0-rmse:75.0274\n",
|
|
"[20]\tvalidation_0-rmse:74.0297\n",
|
|
"[21]\tvalidation_0-rmse:72.1579\n",
|
|
"[22]\tvalidation_0-rmse:70.6119\n",
|
|
"[23]\tvalidation_0-rmse:69.7389\n",
|
|
"[24]\tvalidation_0-rmse:67.9469\n",
|
|
"[25]\tvalidation_0-rmse:66.8921\n",
|
|
"[26]\tvalidation_0-rmse:66.1554\n",
|
|
"[27]\tvalidation_0-rmse:64.6994\n",
|
|
"[28]\tvalidation_0-rmse:63.5188\n",
|
|
"[29]\tvalidation_0-rmse:62.7831\n",
|
|
"[30]\tvalidation_0-rmse:62.3533\n",
|
|
"[31]\tvalidation_0-rmse:61.9013\n",
|
|
"[32]\tvalidation_0-rmse:60.8512\n",
|
|
"[33]\tvalidation_0-rmse:60.1541\n",
|
|
"[34]\tvalidation_0-rmse:59.5948\n",
|
|
"[35]\tvalidation_0-rmse:59.0876\n",
|
|
"[36]\tvalidation_0-rmse:58.6049\n",
|
|
"[37]\tvalidation_0-rmse:58.2507\n",
|
|
"[38]\tvalidation_0-rmse:57.4195\n",
|
|
"[39]\tvalidation_0-rmse:57.0364\n",
|
|
"[40]\tvalidation_0-rmse:56.634\n",
|
|
"[41]\tvalidation_0-rmse:56.279\n",
|
|
"[42]\tvalidation_0-rmse:56.1874\n",
|
|
"[43]\tvalidation_0-rmse:55.5723\n",
|
|
"[44]\tvalidation_0-rmse:55.4855\n",
|
|
"[45]\tvalidation_0-rmse:54.8205\n",
|
|
"[46]\tvalidation_0-rmse:54.663\n",
|
|
"[47]\tvalidation_0-rmse:54.1199\n",
|
|
"[48]\tvalidation_0-rmse:53.8837\n",
|
|
"[49]\tvalidation_0-rmse:53.6094\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"mean_absolute_error=41.16\n",
|
|
"Model export success: mockup-model.dat\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Best RMSE on eval: %.2f with %d rounds 53.609386 50\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ModelServe(model_file=\"mockup-model.dat\").train()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Predict locally\n",
|
|
"\n",
|
|
"* Run prediction inside the notebook using the newly created notebook"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"model_file not supplied; using the default\n",
|
|
"model_file=mockup-model.dat\n",
|
|
"[14:45:28] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[[68.33491516113281, 68.33491516113281]]"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"(train_X, train_y), (test_X, test_y) =read_synthetic_input()\n",
|
|
"\n",
|
|
"ModelServe().predict(test_X, None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Use Fairing to Launch a K8s Job to train your model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Set up Kubeflow Fairing for training and predictions\n",
|
|
"\n",
|
|
"Import the `fairing` library and configure the environment that your training or prediction job will run in."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"issue-label-bot-dev\n",
|
|
"gcr.io/issue-label-bot-dev/fairing-job\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Setting up google container repositories (GCR) for storing output containers\n",
|
|
"# You can use any docker container registry istead of GCR\n",
|
|
"GCP_PROJECT = fairing.cloud.gcp.guess_project_name()\n",
|
|
"print(GCP_PROJECT)\n",
|
|
"DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)\n",
|
|
"print(DOCKER_REGISTRY)\n",
|
|
"PY_VERSION = \".\".join([str(x) for x in sys.version_info[0:3]])\n",
|
|
"BASE_IMAGE = 'python:{}'.format(PY_VERSION)\n",
|
|
"# ucan use Dockerfile in this repo to build and use the base_image\n",
|
|
"base_image = \"gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Use fairing to build the docker image\n",
|
|
"\n",
|
|
"* This uses the append builder to rapidly build docker images"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[PosixPath('build-train-deploy.py'), 'xgboost_util.py', 'mockup-model.dat']"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from fairing.builders import cluster\n",
|
|
"preprocessor = ConvertNotebookPreprocessorWithFire(\"ModelServe\")\n",
|
|
"\n",
|
|
"if not preprocessor.input_files:\n",
|
|
" preprocessor.input_files = set()\n",
|
|
"input_files=[\"xgboost_util.py\", \"mockup-model.dat\"]\n",
|
|
"preprocessor.input_files = set([os.path.normpath(f) for f in input_files])\n",
|
|
"preprocessor.preprocess()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Building image using cluster builder.\n",
|
|
"Creating docker context: /tmp/fairing_context_5d629kor\n",
|
|
"Waiting for fairing-builder-lz9zx to start...\n",
|
|
"Pod started running True\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[36mINFO\u001b[0m[0000] Downloading base image gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Downloading base image gcr.io/kubeflow-images-public/xgboost-fairing-example-base:v-20190612\n",
|
|
"\u001b[33mWARN\u001b[0m[0000] Error while retrieving image from cache: getting image from path: open /cache/sha256:f90e54e312c4cfba28bec6993add2a85b4e127b52149ec0aaf41e5f8889a4086: no such file or directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Checking for cached layer gcr.io/issue-label-bot-dev/fairing-job/fairing-job/cache:e46cfa04f5f0d0445ce3ce8b91886d94e96f2875510a69aa9afaeb0ba9e62fc4...\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Using caching version of cmd: RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Using files from context: [/kaniko/buildcontext/app]\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Taking snapshot of full filesystem...\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /dev, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /etc/secrets, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /kaniko, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /proc, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /sys, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Skipping paths under /var/run, as it is a whitelisted directory\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] WORKDIR /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] cmd: workdir\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Changed working directory to /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Creating directory /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Taking snapshot of files...\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] ENV FAIRING_RUNTIME 1\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] No files changed in this command, skipping snapshotting.\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
|
"\u001b[36mINFO\u001b[0m[0000] Found cached layer, extracting to filesystem\n",
|
|
"\u001b[36mINFO\u001b[0m[0001] No files changed in this command, skipping snapshotting.\n",
|
|
"\u001b[36mINFO\u001b[0m[0001] Using files from context: [/kaniko/buildcontext/app]\n",
|
|
"\u001b[36mINFO\u001b[0m[0001] COPY /app/ /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0001] Taking snapshot of files...\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:d13453f7d2b8d0adfd86c3989a5b695cef5afc3efaafe559643071f258c9f06d\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:0ba512db704a2eb85f7f372d1c809d58589531e3bae794f0aaba86cee912f923\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:9ee379bde91a3cecfb08d4189af0a2bcecc2da1c5102e49443088ccd7bd9abfa\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:507170ae8cfaca6cf2999295221d1324f1051fa15ba59e04dd7dafdc8de565bc\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:2f1ee468081da0ca09360c50281ed261d8b3fb01f664262c3f278d8619eb4e9a\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:d099b15c53311dc296426716edabe61dcc19e88009c19098b17ba965357c4391\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:bad6918fba4b1c68f82d1a4b6063b3ce64975a73b33b38b35454b1d484a6b57b\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:0fd02182c40eb28e13c4d7efd5dd4c81d985d9b07c9c809cc26e7bdb2dced07e\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:079dd3e30fa3eed702bb20a2f725da9907e2732bdc4dfb2fb5084a3423c3f743\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:e7fea64fabbc6f5961864ce5c6bcc143ab616d325b0c5a26848d8e427806104f\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:a5ba9de0ac70b35658f5898c27b52063a597d790308fb853021e881e04a6efb7\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:bbf0f5f91e8108d9b0be1ceeb749e63788ce7394a184bc8a70d24017eca7b7ba\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:55dbf73eb7c7c005c3ccff29b62ff180e2f29245d14794dd6d5d8ad855d0ea88\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:4bfa6a63a3897359eff3ca3ee27c2e05ba76b790a07e6583714c1d324c2d4f21\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:5d8a6f34a39a1e098f09b39ee4e9d4a178fef6ec71c2046fe0b040c4667c8143\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:b893ca5fa31bb87be0d3fa3a403dac7ca12c955d6fd522fd35e3260dbd0e99da\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:ecc17173ccb5b7692a6d31b0077b8e4f543fb45f8c2b5c252dcad9ad0c9be0f7\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:eed14867f5ee443ad7efc89d0d4392683799a413244feec120f43074bc2d43ef\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:07e06c833ecb3b115e378d7f2ba5817ba77cfd02f5794a9817ede0622fbbf8a5\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:541a15d3a9d79f7d3e5e0f552f396406b3e3093247f71e0ae71dd8b7242ec428\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:8143617e89d7ba1957e3dc6d7093a48bd0cd4a2a709bc0c9d0ffc6dde11467e8\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:2327f2e2474891211dbf7fb2d54e16e7b2889fea157b726645cc05e75ad917e8\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:8c58e650bb886ab24426958165c15abe1a1c10e8710f50233701fd503e23e7ac\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:90a7e2cb4d7460e55f83c6e47f9f8d089895ee6e1cc51ae5c23eab3bdcb70363\n",
|
|
"2019/07/18 21:45:45 existing blob: sha256:1cf84c00b8903926c231b4b5974c0419556a4a578bf9416f585fcbf1b7aa70ab\n",
|
|
"2019/07/18 21:45:46 pushed blob sha256:8ab941f264e893bf2d02a0f6d2972fa5f725995cba85b0a897cee1531525bba1\n",
|
|
"2019/07/18 21:45:46 pushed blob sha256:acb611ba3316584866914521fe68dd9892e3fea865900f7c15f2f7268587cd93\n",
|
|
"2019/07/18 21:45:46 pushed blob sha256:80794aeb9ef80da69469ae895f20899b52d9115e4161543c83774863e97fc507\n",
|
|
"2019/07/18 21:45:47 gcr.io/issue-label-bot-dev/fairing-job/fairing-job:E480ACAF: digest: sha256:1c10c3629d920b78e54f16fe268eb77f976d1ff5a48b31a9f54df478ff012a2a size: 5468\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cluster_builder = cluster.cluster.ClusterBuilder(registry=DOCKER_REGISTRY,\n",
|
|
" base_image=base_image,\n",
|
|
" namespace='kubeflow',\n",
|
|
" preprocessor=preprocessor,\n",
|
|
" pod_spec_mutators=[fairing.cloud.gcp.add_gcp_credentials_if_exists],\n",
|
|
" context_source=cluster.gcs_context.GCSContextSource())\n",
|
|
"cluster_builder.build()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Building image using Append builder...\n",
|
|
"Creating docker context: /tmp/fairing_context_xpzlon_h\n",
|
|
"build-train-deploy.py already exists in Fairing context, skipping...\n",
|
|
"Loading Docker credentials for repository 'gcr.io/issue-label-bot-dev/fairing-job/fairing-job:E480ACAF'\n",
|
|
"Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n",
|
|
"Successfully obtained Docker credentials.\n",
|
|
"Image successfully built in 1.2515304939588532s.\n",
|
|
"Pushing image gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0...\n",
|
|
"Loading Docker credentials for repository 'gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0'\n",
|
|
"Invoking 'docker-credential-gcloud' to obtain Docker credentials.\n",
|
|
"Successfully obtained Docker credentials.\n",
|
|
"Uploading gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0\n",
|
|
"Layer sha256:9d866f8bde2a0d607a6d17edc0fbd5e00b58306efc2b0a57e0ba72f269e7c6be exists, skipping\n",
|
|
"Layer sha256:124c757242f88002a858c23fc79f8262f9587fa30fd92507e586ad074afb42b6 exists, skipping\n",
|
|
"Layer sha256:bbf0f5f91e8108d9b0be1ceeb749e63788ce7394a184bc8a70d24017eca7b7ba exists, skipping\n",
|
|
"Layer sha256:e7fea64fabbc6f5961864ce5c6bcc143ab616d325b0c5a26848d8e427806104f exists, skipping\n",
|
|
"Layer sha256:d099b15c53311dc296426716edabe61dcc19e88009c19098b17ba965357c4391 exists, skipping\n",
|
|
"Layer sha256:079dd3e30fa3eed702bb20a2f725da9907e2732bdc4dfb2fb5084a3423c3f743 exists, skipping\n",
|
|
"Layer sha256:80794aeb9ef80da69469ae895f20899b52d9115e4161543c83774863e97fc507 exists, skipping\n",
|
|
"Layer sha256:eed14867f5ee443ad7efc89d0d4392683799a413244feec120f43074bc2d43ef exists, skipping\n",
|
|
"Layer sha256:55dbf73eb7c7c005c3ccff29b62ff180e2f29245d14794dd6d5d8ad855d0ea88 exists, skipping\n",
|
|
"Layer sha256:8ab941f264e893bf2d02a0f6d2972fa5f725995cba85b0a897cee1531525bba1 exists, skipping\n",
|
|
"Layer sha256:2327f2e2474891211dbf7fb2d54e16e7b2889fea157b726645cc05e75ad917e8 exists, skipping\n",
|
|
"Layer sha256:fa3f2f277e67c5cbbf1dac21dc27111a60d3cd2ef494d94aa1515d3319f2a245 exists, skipping\n",
|
|
"Layer sha256:afde35469481d2bc446d649a7a3d099147bbf7696b66333e76a411686b617ea1 exists, skipping\n",
|
|
"Layer sha256:d13453f7d2b8d0adfd86c3989a5b695cef5afc3efaafe559643071f258c9f06d exists, skipping\n",
|
|
"Layer sha256:2f1ee468081da0ca09360c50281ed261d8b3fb01f664262c3f278d8619eb4e9a exists, skipping\n",
|
|
"Layer sha256:8c58e650bb886ab24426958165c15abe1a1c10e8710f50233701fd503e23e7ac exists, skipping\n",
|
|
"Layer sha256:507170ae8cfaca6cf2999295221d1324f1051fa15ba59e04dd7dafdc8de565bc exists, skipping\n",
|
|
"Layer sha256:b893ca5fa31bb87be0d3fa3a403dac7ca12c955d6fd522fd35e3260dbd0e99da exists, skipping\n",
|
|
"Layer sha256:8143617e89d7ba1957e3dc6d7093a48bd0cd4a2a709bc0c9d0ffc6dde11467e8 exists, skipping\n",
|
|
"Layer sha256:1cf84c00b8903926c231b4b5974c0419556a4a578bf9416f585fcbf1b7aa70ab exists, skipping\n",
|
|
"Layer sha256:4bfa6a63a3897359eff3ca3ee27c2e05ba76b790a07e6583714c1d324c2d4f21 exists, skipping\n",
|
|
"Layer sha256:5d8a6f34a39a1e098f09b39ee4e9d4a178fef6ec71c2046fe0b040c4667c8143 exists, skipping\n",
|
|
"Layer sha256:0ba512db704a2eb85f7f372d1c809d58589531e3bae794f0aaba86cee912f923 exists, skipping\n",
|
|
"Layer sha256:a5ba9de0ac70b35658f5898c27b52063a597d790308fb853021e881e04a6efb7 exists, skipping\n",
|
|
"Layer sha256:bad6918fba4b1c68f82d1a4b6063b3ce64975a73b33b38b35454b1d484a6b57b exists, skipping\n",
|
|
"Layer sha256:0fd02182c40eb28e13c4d7efd5dd4c81d985d9b07c9c809cc26e7bdb2dced07e exists, skipping\n",
|
|
"Layer sha256:541a15d3a9d79f7d3e5e0f552f396406b3e3093247f71e0ae71dd8b7242ec428 exists, skipping\n",
|
|
"Layer sha256:ecc17173ccb5b7692a6d31b0077b8e4f543fb45f8c2b5c252dcad9ad0c9be0f7 exists, skipping\n",
|
|
"Layer sha256:07e06c833ecb3b115e378d7f2ba5817ba77cfd02f5794a9817ede0622fbbf8a5 exists, skipping\n",
|
|
"Layer sha256:9ee379bde91a3cecfb08d4189af0a2bcecc2da1c5102e49443088ccd7bd9abfa exists, skipping\n",
|
|
"Layer sha256:90a7e2cb4d7460e55f83c6e47f9f8d089895ee6e1cc51ae5c23eab3bdcb70363 exists, skipping\n",
|
|
"Layer sha256:398d32b153e84fe343f0c5b07d65e89b05551aae6cb8b3a03bb2b662976eb3b8 exists, skipping\n",
|
|
"Layer sha256:3885f9a80c70bf1aa3d3b925004fcca76334d45aa96d5e95412b40cae1dbdbba pushed.\n",
|
|
"Layer sha256:e94d45d512ce4033820c7df7dae67aa2d300528fed0ea5a53d6dcd099b2e4ca1 pushed.\n",
|
|
"Finished upload of: gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0\n",
|
|
"Pushed image gcr.io/issue-label-bot-dev/fairing-job/fairing-job:DA1D5CB0 in 3.6773080190178007s.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"builder = append.append.AppendBuilder(registry=DOCKER_REGISTRY,\n",
|
|
" base_image=cluster_builder.image_tag, preprocessor=preprocessor)\n",
|
|
"builder.build()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Launch the K8s Job\n",
|
|
"\n",
|
|
"* Use pod mutators to attach a PVC and credentials to the pod"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"WARNING:fairing.deployers.job.job:Training job fairing-job-t429t launched.\n",
|
|
"WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n",
|
|
"WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n",
|
|
"WARNING:fairing.kubernetes.manager:Waiting for fairing-job-t429t-xscgt to start...\n",
|
|
"INFO:fairing.kubernetes.manager:Pod started running True\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"model_file not supplied; using the default\n",
|
|
"model_file=mockup-model.dat\n",
|
|
"[0]\tvalidation_0-rmse:90.6249\n",
|
|
"Will train until validation_0-rmse hasn't improved in 40 rounds.\n",
|
|
"[1]\tvalidation_0-rmse:85.3672\n",
|
|
"[2]\tvalidation_0-rmse:80.6077\n",
|
|
"[3]\tvalidation_0-rmse:75.9867\n",
|
|
"[4]\tvalidation_0-rmse:72.15\n",
|
|
"[5]\tvalidation_0-rmse:68.4247\n",
|
|
"[6]\tvalidation_0-rmse:65.4166\n",
|
|
"[7]\tvalidation_0-rmse:62.7606\n",
|
|
"[8]\tvalidation_0-rmse:60.1438\n",
|
|
"[9]\tvalidation_0-rmse:57.9401\n",
|
|
"[10]\tvalidation_0-rmse:55.8747\n",
|
|
"[11]\tvalidation_0-rmse:53.957\n",
|
|
"[12]\tvalidation_0-rmse:52.2249\n",
|
|
"[13]\tvalidation_0-rmse:50.556\n",
|
|
"[14]\tvalidation_0-rmse:49.2282\n",
|
|
"[15]\tvalidation_0-rmse:47.8585\n",
|
|
"[16]\tvalidation_0-rmse:46.6933\n",
|
|
"[17]\tvalidation_0-rmse:45.5335\n",
|
|
"[18]\tvalidation_0-rmse:44.3206\n",
|
|
"[19]\tvalidation_0-rmse:43.2371\n",
|
|
"[20]\tvalidation_0-rmse:42.5117\n",
|
|
"[21]\tvalidation_0-rmse:41.6298\n",
|
|
"[22]\tvalidation_0-rmse:40.9242\n",
|
|
"[23]\tvalidation_0-rmse:40.1302\n",
|
|
"[24]\tvalidation_0-rmse:39.4707\n",
|
|
"[25]\tvalidation_0-rmse:38.8031\n",
|
|
"[26]\tvalidation_0-rmse:38.3108\n",
|
|
"[27]\tvalidation_0-rmse:37.689\n",
|
|
"[28]\tvalidation_0-rmse:37.1699\n",
|
|
"[29]\tvalidation_0-rmse:36.5853\n",
|
|
"[30]\tvalidation_0-rmse:36.3127\n",
|
|
"[31]\tvalidation_0-rmse:35.8365\n",
|
|
"[32]\tvalidation_0-rmse:35.4656\n",
|
|
"[33]\tvalidation_0-rmse:35.2841\n",
|
|
"[34]\tvalidation_0-rmse:35.0051\n",
|
|
"[35]\tvalidation_0-rmse:34.611\n",
|
|
"[36]\tvalidation_0-rmse:34.2154\n",
|
|
"[37]\tvalidation_0-rmse:34.1117\n",
|
|
"[38]\tvalidation_0-rmse:33.9263\n",
|
|
"[39]\tvalidation_0-rmse:33.6358\n",
|
|
"[40]\tvalidation_0-rmse:33.4676\n",
|
|
"[41]\tvalidation_0-rmse:33.3131\n",
|
|
"[42]\tvalidation_0-rmse:33.1974\n",
|
|
"[43]\tvalidation_0-rmse:32.9947\n",
|
|
"[44]\tvalidation_0-rmse:32.9613\n",
|
|
"[45]\tvalidation_0-rmse:32.8286\n",
|
|
"[46]\tvalidation_0-rmse:32.6034\n",
|
|
"[47]\tvalidation_0-rmse:32.4865\n",
|
|
"[48]\tvalidation_0-rmse:32.334\n",
|
|
"[49]\tvalidation_0-rmse:32.1416\n",
|
|
"mean_absolute_error=18.60\n",
|
|
"Model export success: mockup-model.dat\n",
|
|
"Best RMSE on eval: %.2f with %d rounds 32.141602 50\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pod_spec = builder.generate_pod_spec()\n",
|
|
"NAMESPACE = \"user1\"\n",
|
|
"train_deployer = job.job.Job(namespace=NAMESPACE, \n",
|
|
" cleanup=False,\n",
|
|
" pod_spec_mutators=[\n",
|
|
" fairing.cloud.gcp.add_gcp_credentials_if_exists])\n",
|
|
"\n",
|
|
"# Add command line arguments\n",
|
|
"pod_spec.containers[0].command.extend([\"train\"])\n",
|
|
"result = train_deployer.deploy(pod_spec)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"apiVersion: v1\r\n",
|
|
"items:\r\n",
|
|
"- apiVersion: batch/v1\r\n",
|
|
" kind: Job\r\n",
|
|
" metadata:\r\n",
|
|
" creationTimestamp: \"2019-06-12T20:21:53Z\"\r\n",
|
|
" generateName: fairing-job-\r\n",
|
|
" labels:\r\n",
|
|
" fairing-deployer: job\r\n",
|
|
" fairing-id: b7955e0a-8d4f-11e9-9207-96ec34699c76\r\n",
|
|
" name: fairing-job-t429t\r\n",
|
|
" namespace: user1\r\n",
|
|
" resourceVersion: \"7556018\"\r\n",
|
|
" selfLink: /apis/batch/v1/namespaces/user1/jobs/fairing-job-t429t\r\n",
|
|
" uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n",
|
|
" spec:\r\n",
|
|
" backoffLimit: 0\r\n",
|
|
" completions: 1\r\n",
|
|
" parallelism: 1\r\n",
|
|
" selector:\r\n",
|
|
" matchLabels:\r\n",
|
|
" controller-uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n",
|
|
" template:\r\n",
|
|
" metadata:\r\n",
|
|
" creationTimestamp: null\r\n",
|
|
" labels:\r\n",
|
|
" controller-uid: b7b87f19-8d4f-11e9-b008-42010a8e01a5\r\n",
|
|
" fairing-deployer: job\r\n",
|
|
" fairing-id: b7955e0a-8d4f-11e9-9207-96ec34699c76\r\n",
|
|
" job-name: fairing-job-t429t\r\n",
|
|
" name: fairing-deployer\r\n",
|
|
" spec:\r\n",
|
|
" containers:\r\n",
|
|
" - command:\r\n",
|
|
" - python\r\n",
|
|
" - /app/mockup-data-xgboost-build-train-deploy.py\r\n",
|
|
" - train\r\n",
|
|
" env:\r\n",
|
|
" - name: FAIRING_RUNTIME\r\n",
|
|
" value: \"1\"\r\n",
|
|
" - name: GOOGLE_APPLICATION_CREDENTIALS\r\n",
|
|
" value: /etc/secrets/user-gcp-sa.json\r\n",
|
|
" image: gcr.io/zahrakubeflowcodelab/fairing-job/fairing-job:6F63F28C\r\n",
|
|
" imagePullPolicy: IfNotPresent\r\n",
|
|
" name: fairing-job\r\n",
|
|
" resources: {}\r\n",
|
|
" securityContext:\r\n",
|
|
" runAsUser: 0\r\n",
|
|
" terminationMessagePath: /dev/termination-log\r\n",
|
|
" terminationMessagePolicy: File\r\n",
|
|
" volumeMounts:\r\n",
|
|
" - mountPath: /etc/secrets\r\n",
|
|
" name: user-gcp-sa\r\n",
|
|
" readOnly: true\r\n",
|
|
" workingDir: /app/\r\n",
|
|
" dnsPolicy: ClusterFirst\r\n",
|
|
" restartPolicy: Never\r\n",
|
|
" schedulerName: default-scheduler\r\n",
|
|
" securityContext: {}\r\n",
|
|
" terminationGracePeriodSeconds: 30\r\n",
|
|
" volumes:\r\n",
|
|
" - name: user-gcp-sa\r\n",
|
|
" secret:\r\n",
|
|
" defaultMode: 420\r\n",
|
|
" secretName: user-gcp-sa\r\n",
|
|
" status:\r\n",
|
|
" completionTime: \"2019-06-12T20:22:00Z\"\r\n",
|
|
" conditions:\r\n",
|
|
" - lastProbeTime: \"2019-06-12T20:22:00Z\"\r\n",
|
|
" lastTransitionTime: \"2019-06-12T20:22:00Z\"\r\n",
|
|
" status: \"True\"\r\n",
|
|
" type: Complete\r\n",
|
|
" startTime: \"2019-06-12T20:21:53Z\"\r\n",
|
|
" succeeded: 1\r\n",
|
|
"kind: List\r\n",
|
|
"metadata:\r\n",
|
|
" resourceVersion: \"\"\r\n",
|
|
" selfLink: \"\"\r\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!kubectl get jobs -l fairing-id={train_deployer.job_id} -o yaml"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Deploy the trained model to Kubeflow for predictions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"INFO:root:Cluster endpoint: http://fairing-service-jjgxd.user1.svc.cluster.local\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from fairing.deployers import serving\n",
|
|
"pod_spec = builder.generate_pod_spec()\n",
|
|
"\n",
|
|
"module_name = os.path.splitext(preprocessor.executable.name)[0]\n",
|
|
"deployer = serving.serving.Serving(module_name + \".ModelServe\",\n",
|
|
" service_type=\"ClusterIP\",\n",
|
|
" labels={\"app\": \"mockup\"})\n",
|
|
" \n",
|
|
"url = deployer.deploy(pod_spec)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"apiVersion: extensions/v1beta1\r\n",
|
|
"kind: Deployment\r\n",
|
|
"metadata:\r\n",
|
|
" annotations:\r\n",
|
|
" deployment.kubernetes.io/revision: \"1\"\r\n",
|
|
" creationTimestamp: \"2019-06-12T20:22:27Z\"\r\n",
|
|
" generateName: fairing-deployer-\r\n",
|
|
" generation: 1\r\n",
|
|
" labels:\r\n",
|
|
" app: mockup\r\n",
|
|
" fairing-deployer: serving\r\n",
|
|
" fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n",
|
|
" name: fairing-deployer-cltbb\r\n",
|
|
" namespace: user1\r\n",
|
|
" resourceVersion: \"7556174\"\r\n",
|
|
" selfLink: /apis/extensions/v1beta1/namespaces/user1/deployments/fairing-deployer-cltbb\r\n",
|
|
" uid: cbc54e8f-8d4f-11e9-b008-42010a8e01a5\r\n",
|
|
"spec:\r\n",
|
|
" progressDeadlineSeconds: 600\r\n",
|
|
" replicas: 1\r\n",
|
|
" revisionHistoryLimit: 10\r\n",
|
|
" selector:\r\n",
|
|
" matchLabels:\r\n",
|
|
" app: mockup\r\n",
|
|
" fairing-deployer: serving\r\n",
|
|
" fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n",
|
|
" strategy:\r\n",
|
|
" rollingUpdate:\r\n",
|
|
" maxSurge: 25%\r\n",
|
|
" maxUnavailable: 25%\r\n",
|
|
" type: RollingUpdate\r\n",
|
|
" template:\r\n",
|
|
" metadata:\r\n",
|
|
" creationTimestamp: null\r\n",
|
|
" labels:\r\n",
|
|
" app: mockup\r\n",
|
|
" fairing-deployer: serving\r\n",
|
|
" fairing-id: cbc0e610-8d4f-11e9-9207-96ec34699c76\r\n",
|
|
" name: fairing-deployer\r\n",
|
|
" spec:\r\n",
|
|
" containers:\r\n",
|
|
" - command:\r\n",
|
|
" - seldon-core-microservice\r\n",
|
|
" - mockup-data-xgboost-build-train-deploy.ModelServe\r\n",
|
|
" - REST\r\n",
|
|
" - --service-type=MODEL\r\n",
|
|
" - --persistence=0\r\n",
|
|
" env:\r\n",
|
|
" - name: FAIRING_RUNTIME\r\n",
|
|
" value: \"1\"\r\n",
|
|
" image: gcr.io/zahrakubeflowcodelab/fairing-job/fairing-job:6F63F28C\r\n",
|
|
" imagePullPolicy: IfNotPresent\r\n",
|
|
" name: model\r\n",
|
|
" resources: {}\r\n",
|
|
" securityContext:\r\n",
|
|
" runAsUser: 0\r\n",
|
|
" terminationMessagePath: /dev/termination-log\r\n",
|
|
" terminationMessagePolicy: File\r\n",
|
|
" workingDir: /app/\r\n",
|
|
" dnsPolicy: ClusterFirst\r\n",
|
|
" restartPolicy: Always\r\n",
|
|
" schedulerName: default-scheduler\r\n",
|
|
" securityContext: {}\r\n",
|
|
" terminationGracePeriodSeconds: 30\r\n",
|
|
"status:\r\n",
|
|
" availableReplicas: 1\r\n",
|
|
" conditions:\r\n",
|
|
" - lastTransitionTime: \"2019-06-12T20:22:29Z\"\r\n",
|
|
" lastUpdateTime: \"2019-06-12T20:22:29Z\"\r\n",
|
|
" message: Deployment has minimum availability.\r\n",
|
|
" reason: MinimumReplicasAvailable\r\n",
|
|
" status: \"True\"\r\n",
|
|
" type: Available\r\n",
|
|
" - lastTransitionTime: \"2019-06-12T20:22:27Z\"\r\n",
|
|
" lastUpdateTime: \"2019-06-12T20:22:29Z\"\r\n",
|
|
" message: ReplicaSet \"fairing-deployer-cltbb-864d4d6f8f\" has successfully progressed.\r\n",
|
|
" reason: NewReplicaSetAvailable\r\n",
|
|
" status: \"True\"\r\n",
|
|
" type: Progressing\r\n",
|
|
" observedGeneration: 1\r\n",
|
|
" readyReplicas: 1\r\n",
|
|
" replicas: 1\r\n",
|
|
" updatedReplicas: 1\r\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!kubectl get deploy -o yaml {deployer.deployment.metadata.name}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Call the prediction endpoint\n",
|
|
"\n",
|
|
"Create a test dataset, then call the endpoint on Kubeflow for predictions."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"(train_X, train_y), (test_X, test_y) =read_synthetic_input()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(b'<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\\n<title>500 Inter'\n",
|
|
" b'nal Server Error</title>\\n<h1>Internal Server Error</h1>\\n<p>The server en'\n",
|
|
" b'countered an internal error and was unable to complete your request. Either '\n",
|
|
" b'the server is overloaded or there is an error in the application.</p>\\n')\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"full_url = url + \":5000/predict\"\n",
|
|
"result = util.predict_nparray(full_url, test_X)\n",
|
|
"pprint.pprint(result.content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Clean up the prediction endpoint\n",
|
|
"\n",
|
|
"Delete the prediction endpoint created by this notebook."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# !kubectl delete service -l app=ames\n",
|
|
"# !kubectl delete deploy -l app=ames"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Build a simple 1 step pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"EXPERIMENT_NAME = 'MockupModel'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Define the pipeline\n",
|
|
"Pipeline function has to be decorated with the `@dsl.pipeline` decorator"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@dsl.pipeline(\n",
|
|
" name='Training pipeline',\n",
|
|
" description='A pipeline that trains an xgboost model for the Ames dataset.'\n",
|
|
")\n",
|
|
"def train_pipeline(\n",
|
|
" ): \n",
|
|
" command=[\"python\", preprocessor.executable.name, \"train\"]\n",
|
|
" train_op = dsl.ContainerOp(\n",
|
|
" name=\"train\", \n",
|
|
" image=builder.image_tag, \n",
|
|
" command=command,\n",
|
|
" ).apply(\n",
|
|
" gcp.use_gcp_secret('user-gcp-sa'),\n",
|
|
" )\n",
|
|
" train_op.container.working_dir = \"/app\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Compile the pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pipeline_func = train_pipeline\n",
|
|
"pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'\n",
|
|
"compiler.Compiler().compile(pipeline_func, pipeline_filename)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Submit the pipeline for execution"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"INFO:root:Creating experiment MockupModel.\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"Experiment link <a href=\"/pipeline/#/experiments/details/a446547d-14f7-4dae-935b-a3a66fceea44\" target=\"_blank\" >here</a>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"Run link <a href=\"/pipeline/#/runs/details/d0778d1d-8d50-11e9-b008-42010a8e01a5\" target=\"_blank\" >here</a>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"#Specify pipeline argument values\n",
|
|
"arguments = {}\n",
|
|
"\n",
|
|
"# Get or create an experiment and submit a pipeline run\n",
|
|
"client = kfp.Client()\n",
|
|
"experiment = client.create_experiment(EXPERIMENT_NAME)\n",
|
|
"\n",
|
|
"#Submit a pipeline run\n",
|
|
"run_name = pipeline_func.__name__ + ' run'\n",
|
|
"run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)\n",
|
|
"\n",
|
|
"#vvvvvvvvv This link leads to the run information page. (Note: There is a bug in JupyterLab that modifies the URL and makes the link stop working)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|