From f9873e6ac4c818dc71ff1af8a823af40c54ae450 Mon Sep 17 00:00:00 2001
From: Sanyam Kapoor <activatedgeek@users.noreply.github.com>
Date: Mon, 20 Aug 2018 16:35:07 -0700
Subject: [PATCH] Upgrade notebook commands and other relevant changes (#229)

* Replace double quotes for field values (ks convention)

* Recreate the ksonnet application from scratch

* Fix pip commands to find requirements and redo installation, fix ks param set

* Use sed replace instead of ks param set.

* Add cells to first show JobSpec and then apply

* Upgrade T2T, fix conflicting problem types

* Update docker images

* Reduce to 200k samples for vocab

* Use Jupyter notebook service account

* Add illustrative gsutil commands to show output files, specify index files glob explicitly

* List files after index creation step

* Use the model in current repository and not upstream t2t

* Update Docker images

* Expose TF Serving Rest API at 9001

* Spawn terminal from the notebooks ui, no need to go to lab
---
 code_search/README.md                         |   8 +-
 code_search/code-search.ipynb                 | 346 +++++++++++++++---
 code_search/docker/t2t/Dockerfile             |   4 +-
 code_search/docker/ui/Dockerfile              |   2 +-
 code_search/kubeflow/app.yaml                 |  12 +-
 .../kubeflow/components/params.libsonnet      |  29 +-
 .../README.md                                 |   0
 .../parts.yaml                                |   0
 .../tf-serving-all-features.jsonnet           |   0
 .../tf-serving-with-request-log.jsonnet       | 230 ++++++++++++
 .../tf-serving.libsonnet                      |  31 +-
 .../util.libsonnet                            |   0
 .../nmslib/cli/create_search_index.py         |   2 +-
 .../src/code_search/t2t/function_docstring.py |   4 +-
 .../t2t/function_docstring_extended.py        |   2 +-
 .../code_search/t2t/similarity_transformer.py |   2 +-
 code_search/src/requirements.txt              |   2 +-
 17 files changed, 570 insertions(+), 104 deletions(-)
 rename code_search/kubeflow/vendor/kubeflow/{tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77 => tf-serving@ab6084349673e6405ae486eb3be2141e3550643c}/README.md (100%)
 rename code_search/kubeflow/vendor/kubeflow/{tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77 => tf-serving@ab6084349673e6405ae486eb3be2141e3550643c}/parts.yaml (100%)
 rename code_search/kubeflow/vendor/kubeflow/{tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77 => tf-serving@ab6084349673e6405ae486eb3be2141e3550643c}/prototypes/tf-serving-all-features.jsonnet (100%)
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-with-request-log.jsonnet
 rename code_search/kubeflow/vendor/kubeflow/{tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77 => tf-serving@ab6084349673e6405ae486eb3be2141e3550643c}/tf-serving.libsonnet (95%)
 rename code_search/kubeflow/vendor/kubeflow/{tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77 => tf-serving@ab6084349673e6405ae486eb3be2141e3550643c}/util.libsonnet (100%)

diff --git a/code_search/README.md b/code_search/README.md
index 5fb4f078..2a1c0afe 100644
--- a/code_search/README.md
+++ b/code_search/README.md
@@ -37,17 +37,15 @@ you replace this with the true FQDN of your Kubeflow cluster in any subsequent i
 * In the **Image** text field, enter `gcr.io/kubeflow-images-public/kubeflow-codelab-notebook:v20180808-v0.2-22-gcfdcb12`.
   This image contains all the pre-requisites needed for the demo.
   
-* Once spawned, you should be redirected to the notebooks UI. We intend to go to the JupyterLab home
-  page which is available at the URL - **https://kubeflow.example.com/user/<ACCOUNT_NAME>/lab**.
-  **TIP**: Simply point the browser to **/lab** instead of the **/tree** path in the URL.
-  
+* Once spawned, you should be redirected to the Jupyter Notebooks UI.
+
 * Spawn a new Terminal and run
   ```
   $ git clone --branch=master --depth=1 https://github.com/kubeflow/examples
   ```
   This will create an examples folder. It is safe to close the terminal now.
   
-* Refresh the File Explorer (typically to the left) and navigate to `examples/code_search`. Open
+* Navigate back to the Jupyter Notebooks UI and navigate to `examples/code_search`. Open
   the Jupyter notebook `code-search.ipynb` and follow it along.
 
 # Acknowledgements
diff --git a/code_search/code-search.ipynb b/code_search/code-search.ipynb
index 76236d7c..4e7d13af 100644
--- a/code_search/code-search.ipynb
+++ b/code_search/code-search.ipynb
@@ -21,7 +21,14 @@
    "source": [
     "## Install dependencies\n",
     "\n",
-    "Let us install all the Python dependencies. Note that everything must be done with `Python 2`. This will take a while and only needs to be run once."
+    "Let us install all the Python dependencies. Note that everything must be done with `Python 2`. This will take a while the first time."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Verify Version Information"
    ]
   },
   {
@@ -30,9 +37,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "! pip2 install https://github.com/kubeflow/batch-predict/tarball/master\n",
+    "%%bash\n",
     "\n",
-    "! pip2 install -r src/requirements.txt"
+    "echo \"Pip Version Info: \" && python2 --version && python2 -m pip --version && echo\n",
+    "echo \"Google Cloud SDK Info: \" && gcloud --version && echo\n",
+    "echo \"Ksonnet Version Info: \" && ks version && echo\n",
+    "echo \"Kubectl Version Info: \" && kubectl version"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Install Pip Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "! python2 -m pip install -U pip"
    ]
   },
   {
@@ -41,8 +69,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Only for BigQuery cells\n",
-    "! pip2 install pandas-gbq"
+    "# Code Search dependencies\n",
+    "! python2 -m pip install --user https://github.com/kubeflow/batch-predict/tarball/master\n",
+    "! python2 -m pip install --user -r src/requirements.txt"
    ]
   },
   {
@@ -51,6 +80,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# BigQuery Cell Dependencies\n",
+    "! python2 -m pip install --user pandas-gbq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NOTE: The RuntimeWarnings (if any) are harmless. See ContinuumIO/anaconda-issues#6678.\n",
     "from pandas.io import gbq"
    ]
   },
@@ -72,10 +112,8 @@
     "# Configuration Variables. Modify as desired.\n",
     "\n",
     "PROJECT = 'kubeflow-dev'\n",
-    "CLUSTER_NAME = 'kubeflow-latest'\n",
-    "CLUSTER_REGION = 'us-east1-d'\n",
-    "CLUSTER_NAMESPACE = 'kubeflow-latest'\n",
     "\n",
+    "# Dataflow Related Variables.\n",
     "TARGET_DATASET = 'code_search'\n",
     "WORKING_DIR = 'gs://kubeflow-examples/t2t-code-search/notebook-demo'\n",
     "WORKER_MACHINE_TYPE = 'n1-highcpu-32'\n",
@@ -83,10 +121,6 @@
     "\n",
     "# DO NOT MODIFY. These are environment variables to be used in a bash shell.\n",
     "%env PROJECT $PROJECT\n",
-    "%env CLUSTER_NAME $CLUSTER_NAME\n",
-    "%env CLUSTER_REGION $CLUSTER_REGION\n",
-    "%env CLUSTER_NAMESPACE $CLUSTER_NAMESPACE\n",
-    "\n",
     "%env TARGET_DATASET $TARGET_DATASET\n",
     "%env WORKING_DIR $WORKING_DIR\n",
     "%env WORKER_MACHINE_TYPE $WORKER_MACHINE_TYPE\n",
@@ -99,7 +133,7 @@
    "source": [
     "###  Setup Authorization\n",
     "\n",
-    "In a Kubeflow cluster, we already have the key credentials available with each pod and will re-use them to authenticate. This will allow us to submit `TFJob`s and execute `Dataflow` pipelines. We also set the new context for the Code Search Ksonnet application."
+    "In a Kubeflow cluster on GKE, we already have the Google Application Credentials mounted onto each Pod. We can simply point `gcloud` to activate that service account."
    ]
   },
   {
@@ -111,13 +145,35 @@
     "%%bash\n",
     "\n",
     "# Activate Service Account provided by Kubeflow.\n",
-    "gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}\n",
+    "gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Additionally, to interact with the underlying cluster, we configure `kubectl`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
     "\n",
-    "# Get KUBECONFIG for the desired cluster.\n",
-    "gcloud container clusters get-credentials ${CLUSTER_NAME} --region ${CLUSTER_REGION}\n",
-    "\n",
-    "# Set the namespace of the context.\n",
-    "kubectl config set contexts.$(kubectl config current-context).namespace ${CLUSTER_NAMESPACE}"
+    "kubectl config set-cluster kubeflow --server=https://kubernetes.default --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n",
+    "kubectl config set-credentials jupyter --token \"$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\"\n",
+    "kubectl config set-context kubeflow --cluster kubeflow --user jupyter\n",
+    "kubectl config use-context kubeflow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Collectively, these allow us to interact with Google Cloud Services as well as the Kubernetes Cluster directly to submit `TFJob`s and execute `Dataflow` pipelines."
    ]
   },
   {
@@ -126,7 +182,7 @@
    "source": [
     "### Setup Ksonnet Application\n",
     "\n",
-    "This will use the context we've set above and provide it as a new environment to the Ksonnet application."
+    "We now point the Ksonnet application to the underlying Kubernetes cluster."
    ]
   },
   {
@@ -139,32 +195,15 @@
     "\n",
     "cd kubeflow\n",
     "\n",
-    "# Update Ksonnet application to the context set earlier\n",
-    "ks env add code-search --context=$(kubectl config current-context)\n",
+    "# Update Ksonnet to point to the Kubernetes Cluster\n",
+    "ks env add code-search --context $(kubectl config current-context)\n",
     "\n",
     "# Update the Working Directory of the application\n",
-    "ks param set t2t-code-search workingDir ${WORKING_DIR}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Verify Version Information"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash\n",
+    "sed -i'' \"s,gs://example/prefix,${WORKING_DIR},\" components/params.libsonnet\n",
     "\n",
-    "echo \"Pip Version Info: \" && pip2 --version && echo\n",
-    "echo \"Google Cloud SDK Info: \" && gcloud --version && echo\n",
-    "echo \"Ksonnet Version Info: \" && ks version && echo\n",
-    "echo \"Kubectl Version Info: \" && kubectl version"
+    "# FIXME(sanyamkapoor): This command completely replaces previous configurations.\n",
+    "# Hence, using string replacement in file.\n",
+    "# ks param set t2t-code-search workingDir ${WORKING_DIR}"
    ]
   },
   {
@@ -173,7 +212,9 @@
    "source": [
     "## View Github Files\n",
     "\n",
-    "This is the query that is run as the first step of the Pre-Processing pipeline and is sent through a set of transformations. This is illustrative of the rows being processed in the pipeline we trigger next."
+    "This is the query that is run as the first step of the Pre-Processing pipeline and is sent through a set of transformations. This is illustrative of the rows being processed in the pipeline we trigger next.\n",
+    "\n",
+    "**WARNING**: The table is large and the query can take a few minutes to complete."
    ]
   },
   {
@@ -294,7 +335,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
    "outputs": [],
    "source": [
     "query = \"\"\"\n",
@@ -308,13 +351,48 @@
     "gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This pipeline also writes a set of CSV files which contain function and docstring pairs delimited by a comma. Here, we list a subset of them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "LIMIT=10\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/data/*.csv | head -n ${LIMIT}"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Prepare Dataset for Training\n",
     "\n",
-    "In this step we will use `t2t-datagen` to convert the transformed data above into the `TFRecord` format. We will run this job on the Kubeflow cluster."
+    "We will use `t2t-datagen` to convert the transformed data above into the `TFRecord` format.\n",
+    "\n",
+    "**TIP**: Use `ks show` to view the Resource Spec submitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c t2t-code-search-datagen"
    ]
   },
   {
@@ -334,7 +412,43 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Execute Tensorflow Training"
+    "Once this job finishes, the data directory should have a vocabulary file and a list of `TFRecords` prefixed by the problem name which in our case is `github_function_docstring_extended`. Here, we list a subset of them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "LIMIT=10\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/data/vocab*\n",
+    "gsutil ls ${WORKING_DIR}/data/*train* | head -n ${LIMIT}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Execute Tensorflow Training\n",
+    "\n",
+    "Once, the `TFRecords` are generated, we will use `t2t-trainer` to execute the training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c t2t-code-search-trainer"
    ]
   },
   {
@@ -354,7 +468,40 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Export Tensorflow Model"
+    "This will generate TensorFlow model checkpoints which is illustrated below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/output/*ckpt*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Export Tensorflow Model\n",
+    "\n",
+    "We now use `t2t-exporter` to export the `TFModel`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c t2t-code-search-exporter"
    ]
   },
   {
@@ -370,6 +517,24 @@
     "ks apply code-search -c t2t-code-search-exporter"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once completed, this will generate a TensorFlow `SavedModel` which we will further use for both online (via `TF Serving`) and offline inference (via `Kubeflow Batch Prediction`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/output/export/Servo"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -398,7 +563,7 @@
    "source": [
     "### Configuration\n",
     "\n",
-    "First, select a Exported Model version from the `${WORKING_DIR}/output/export/Servo`. This should be name of a folder with UNIX Seconds Timestamp like `1533685294`. Below, we automatically do that by selecting the folder which represents the latest timestamp."
+    "First, select a Exported Model version from the `${WORKING_DIR}/output/export/Servo` as seen above. This should be name of a folder with UNIX Seconds Timestamp like `1533685294`. Below, we automatically do that by selecting the folder which represents the latest timestamp."
    ]
   },
   {
@@ -442,14 +607,17 @@
     "\n",
     "cd src\n",
     "\n",
+    "JOB_NAME=\"compute-function-embeddings-$(date +'%Y%m%d-%H%M%S')\"\n",
+    "PROBLEM=github_function_docstring_extended\n",
+    "\n",
     "python2 -m code_search.dataflow.cli.create_function_embeddings \\\n",
-    "        --runner DataflowRunner\n",
+    "        --runner DataflowRunner \\\n",
     "        --project \"${PROJECT}\" \\\n",
     "        --target_dataset \"${TARGET_DATASET}\" \\\n",
-    "        --problem github_function_docstring \\\n",
+    "        --problem \"${PROBLEM}\" \\\n",
     "        --data_dir \"${WORKING_DIR}/data\" \\\n",
     "        --saved_model_dir \"${WORKING_DIR}/output/export/Servo/${MODEL_VERSION}\" \\\n",
-    "        --job_name compute-function-embeddings\n",
+    "        --job_name \"${JOB_NAME}\" \\\n",
     "        --temp_location \"${WORKING_DIR}/dataflow/temp\" \\\n",
     "        --staging_location \"${WORKING_DIR}/dataflow/staging\" \\\n",
     "        --worker_machine_type \"${WORKER_MACHINE_TYPE}\" \\\n",
@@ -480,13 +648,46 @@
     "gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The pipeline also generates a set of CSV files which will be useful to generate the search index."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "LIMIT=10\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/data/*index*.csv | head -n ${LIMIT}"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Create Search Index\n",
     "\n",
-    "We now create the Search Index from the computed embeddings so that during a query we can do a k-Nearest Neighbor search to give out semantically similar results."
+    "We now create the Search Index from the computed embeddings. This facilitates k-Nearest Neighbor search to for semantically similar results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c search-index-creator"
    ]
   },
   {
@@ -506,7 +707,18 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Using the CSV files generated from the previous step, this creates an index using [NMSLib](https://github.com/nmslib/nmslib). A unified CSV file containing all the code examples for a human-readable reverse lookup during the query, is also created in the `WORKING_DIR`."
+    "Using the CSV files generated from the previous step, this creates an index using [NMSLib](https://github.com/nmslib/nmslib). A unified CSV file containing all the code examples for a human-readable reverse lookup during the query, is also created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "gsutil ls ${WORKING_DIR}/code_search_index*"
    ]
   },
   {
@@ -518,6 +730,19 @@
     "We've seen offline inference during the computation of embeddings. For online inference, we deploy the exported Tensorflow model above using [Tensorflow Serving](https://www.tensorflow.org/serving/)."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c t2t-code-search-serving"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -540,6 +765,19 @@
     "We finally deploy the Search UI which allows the user to input arbitrary strings and see a list of results corresponding to semantically similar Python functions. This internally uses the inference server we just deployed."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "cd kubeflow\n",
+    "\n",
+    "ks show code-search -c search-index-server"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/code_search/docker/t2t/Dockerfile b/code_search/docker/t2t/Dockerfile
index ca6d1acc..141e5700 100644
--- a/code_search/docker/t2t/Dockerfile
+++ b/code_search/docker/t2t/Dockerfile
@@ -2,7 +2,7 @@ ARG BASE_IMAGE_TAG=1.8.0
 
 FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
 
-RUN pip --no-cache-dir install tensor2tensor~=1.6.0 oauth2client~=4.1.0 &&\
+RUN pip --no-cache-dir install tensor2tensor~=1.7.0 oauth2client~=4.1.0 &&\
     apt-get update && apt-get install -y jq &&\
     rm -rf /var/lib/apt/lists/*
 
@@ -15,6 +15,4 @@ ENV PYTHONIOENCODING=utf-8 T2T_USR_DIR=/app/code_search/t2t
 
 VOLUME ["/data", "/output"]
 
-EXPOSE 8008
-
 ENTRYPOINT ["bash"]
diff --git a/code_search/docker/ui/Dockerfile b/code_search/docker/ui/Dockerfile
index ca8820f5..1adb85ab 100644
--- a/code_search/docker/ui/Dockerfile
+++ b/code_search/docker/ui/Dockerfile
@@ -9,7 +9,7 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - &&\
                 numpy~=1.14.0 \
                 oauth2client~=4.1.0 \
                 requests~=2.18.0 \
-                tensor2tensor~=1.6.0 &&\
+                tensor2tensor~=1.7.0 &&\
     rm -rf /var/lib/apt/lists/*
 
 ADD src/ /src
diff --git a/code_search/kubeflow/app.yaml b/code_search/kubeflow/app.yaml
index 66e69805..9bc420bd 100644
--- a/code_search/kubeflow/app.yaml
+++ b/code_search/kubeflow/app.yaml
@@ -1,17 +1,17 @@
 apiVersion: 0.2.0
 environments:
-  kf-cs:
+  default:
     destination:
       namespace: kubeflow
-      server: https://35.232.164.190
-    k8sVersion: v1.9.6
-    path: kf-cs
+      server: https://35.237.202.148
+    k8sVersion: v1.9.7
+    path: default
 kind: ksonnet.io/app
 libraries:
   tf-serving:
     name: tf-serving
     registry: kubeflow
-    version: e95f94a1a97a0974ada734895d590b5ba565fa77
+    version: ab6084349673e6405ae486eb3be2141e3550643c
 name: kubeflow
 registries:
   incubator:
@@ -19,5 +19,5 @@ registries:
     uri: github.com/ksonnet/parts/tree/master/incubator
   kubeflow:
     protocol: github
-    uri: https://github.com/kubeflow/kubeflow/tree/v0.2.2/kubeflow
+    uri: https://github.com/kubeflow/kubeflow/tree/master/kubeflow
 version: 0.0.1
diff --git a/code_search/kubeflow/components/params.libsonnet b/code_search/kubeflow/components/params.libsonnet
index 41e39b34..da88e46d 100644
--- a/code_search/kubeflow/components/params.libsonnet
+++ b/code_search/kubeflow/components/params.libsonnet
@@ -9,8 +9,8 @@
       numPsGpu: 0,
       train_steps: 100,
       eval_steps: 10,
-      image: 'gcr.io/kubeflow-dev/code-search:v20180814-66d27b9',
-      imageGpu: 'gcr.io/kubeflow-dev/code-search:v20180814-66d27b9-gpu',
+      image: 'gcr.io/kubeflow-dev/code-search:v20180817-732333a',
+      imageGpu: 'gcr.io/kubeflow-dev/code-search:v20180817-732333a-gpu',
       imagePullSecrets: [],
       dataDir: 'null',
       outputDir: 'null',
@@ -18,34 +18,35 @@
       hparams_set: 'null',
     },
     "t2t-code-search": {
+      name: 't2t-code-search',
       workingDir: 'gs://example/prefix',
-      problem: 'github_function_docstring_extended',
-      model: 'similarity_transformer',
+      problem: 'cs_github_function_docstring',
+      model: 'cs_similarity_transformer',
       hparams_set: 'transformer_tiny',
     },
     "t2t-code-search-datagen": {
       jobType: 'datagen',
       name: 't2t-code-search-datagen',
-      problem: $.components["t2t-code-search"].problem,
+      problem: $.components['t2t-code-search'].problem,
       dataDir: $.components['t2t-code-search'].workingDir + '/data',
     },
     "t2t-code-search-trainer": {
       jobType: 'trainer',
       name: 't2t-code-search-trainer',
-      problem: $.components["t2t-code-search"].problem,
+      problem: $.components['t2t-code-search'].problem,
       dataDir: $.components['t2t-code-search'].workingDir + '/data',
       outputDir: $.components['t2t-code-search'].workingDir + '/output',
-      model: $.components["t2t-code-search"].model,
-      hparams_set: $.components["t2t-code-search"].hparams_set,
+      model: $.components['t2t-code-search'].model,
+      hparams_set: $.components['t2t-code-search']['hparams_set'],
     },
     "t2t-code-search-exporter": {
       jobType: 'exporter',
       name: 't2t-code-search-exporter',
-      problem: $.components["t2t-code-search"].problem,
+      problem: $.components['t2t-code-search'].problem,
       dataDir: $.components['t2t-code-search'].workingDir + '/data',
       outputDir: $.components['t2t-code-search'].workingDir + '/output',
-      model: $.components["t2t-code-search"].model,
-      hparams_set: $.components["t2t-code-search"].hparams_set,
+      model: $.components['t2t-code-search'].model,
+      hparams_set: $.components['t2t-code-search']['hparams_set'],
     },
     "t2t-code-search-serving": {
       name: 't2t-code-search',
@@ -57,7 +58,7 @@
     },
     nmslib: {
       replicas: 1,
-      image: 'gcr.io/kubeflow-dev/code-search-ui:v20180806-7b0fcaa',
+      image: 'gcr.io/kubeflow-dev/code-search-ui:v20180817-0d4a60d',
       problem: 'null',
       dataDir: 'null',
       lookupFile: 'null',
@@ -72,11 +73,11 @@
     },
     "search-index-server": {
       name: 'search-index-server',
-      problem: $.components["t2t-code-search"].problem,
+      problem: $.components['t2t-code-search'].problem,
       dataDir: $.components['t2t-code-search'].workingDir + '/data',
       lookupFile: $.components['t2t-code-search'].workingDir + '/code_search_index.csv',
       indexFile: $.components['t2t-code-search'].workingDir + '/code_search_index.nmslib',
-      servingUrl: 'http://t2t-code-search.kubeflow:8000/v1/models/t2t-code-search:predict',
+      servingUrl: 'http://t2t-code-search.kubeflow:9001/v1/models/t2t-code-search:predict',
     },
   },
 }
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/README.md b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/README.md
similarity index 100%
rename from code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/README.md
rename to code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/README.md
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/parts.yaml b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/parts.yaml
similarity index 100%
rename from code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/parts.yaml
rename to code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/parts.yaml
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/prototypes/tf-serving-all-features.jsonnet b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-all-features.jsonnet
similarity index 100%
rename from code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/prototypes/tf-serving-all-features.jsonnet
rename to code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-all-features.jsonnet
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-with-request-log.jsonnet b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-with-request-log.jsonnet
new file mode 100644
index 00000000..44ba455a
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/prototypes/tf-serving-with-request-log.jsonnet
@@ -0,0 +1,230 @@
+// @apiVersion 0.1
+// @name io.ksonnet.pkg.tf-serving-request-log
+// @description tf-serving with request logging
+// @shortDescription tf-serving with request logging
+// @param name string Name to give to each of the components
+// @param gcpProject string The gcp project for Bigquery dataset
+// @param dataset string The Bigquery dataset
+// @param table string The Bigquery table
+// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
+// @optionalParam modelName string mnist The model name
+
+local k = import "k.libsonnet";
+
+local namespace = "kubeflow";
+local appName = import "param://name";
+local image = "gcr.io/kubeflow-images-public/tf-model-server-cpu:v20180327-995786ec";
+local httpProxyImage = "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723";
+local loggingImage = "gcr.io/kubeflow-images-public/tf-model-server-request-logger:v20180723";
+
+local gcpSecretName = "user-gcp-sa";
+
+local service = {
+  apiVersion: "v1",
+  kind: "Service",
+  metadata: {
+    labels: {
+      app: appName,
+    },
+    name: appName,
+    namespace: namespace,
+  },
+  spec: {
+    ports: [
+      {
+        name: "grpc-tf-serving",
+        port: 9000,
+        targetPort: 9000,
+      },
+      {
+        name: "http-tf-serving-proxy",
+        port: 8000,
+        targetPort: 8000,
+      },
+    ],
+    selector: {
+      app: appName,
+    },
+    type: "ClusterIP",
+  },
+};
+
+local configMap = {
+  apiVersion: "v1",
+  kind: "ConfigMap",
+  metadata: {
+    name: appName + "fluentd-config",
+    namespace: namespace,
+  },
+  data: {
+    "fluent.conf": std.format(|||
+      <source>
+        @type tail
+        path /tmp/logs/request.log
+        pos_file /tmp/logs/request.log.pos
+        <parse>
+          @type json
+        </parse>
+        tag dummy
+      </source>
+      <match dummy>
+        @type bigquery_insert
+        auth_method application_default
+        project %s
+        dataset %s
+        table %s
+        fetch_schema true
+      </match>
+    |||, [params.gcpProject, params.dataset, params.table]),
+  },
+};
+
+local deployment = {
+  apiVersion: "extensions/v1beta1",
+  kind: "Deployment",
+  metadata: {
+    labels: {
+      app: appName,
+    },
+    name: appName,
+    namespace: namespace,
+  },
+  spec: {
+    template: {
+      metadata: {
+        labels: {
+          app: appName,
+        },
+      },
+      spec: {
+        containers: [
+          // ModelServer
+          {
+            args: [
+              "/usr/bin/tensorflow_model_server",
+              "--port=9000",
+              "--model_name=" + params.modelName,
+              "--model_base_path=" + params.modelBasePath,
+            ],
+            image: image,
+            imagePullPolicy: "IfNotPresent",
+            name: "model-server",
+            ports: [
+              {
+                containerPort: 9000,
+              },
+            ],
+            resources: {
+              limits: {
+                cpu: "4",
+                memory: "4Gi",
+              },
+              requests: {
+                cpu: "1",
+                memory: "1Gi",
+              },
+            },
+          },
+          // Http proxy
+          {
+            name: "http-proxy",
+            image: httpProxyImage,
+            imagePullPolicy: "Always",
+            command: [
+              "python",
+              "/usr/src/app/server.py",
+              "--port=8000",
+              "--rpc_port=9000",
+              "--rpc_timeout=10.0",
+              "--log_request=true",
+            ],
+            env: [],
+            ports: [
+              {
+                containerPort: 8000,
+              },
+            ],
+            resources: {
+              requests: {
+                memory: "1Gi",
+                cpu: "1",
+              },
+              limits: {
+                memory: "4Gi",
+                cpu: "4",
+              },
+            },
+            securityContext: {
+              runAsUser: 1000,
+              fsGroup: 1000,
+            },
+            volumeMounts: [
+              {
+                name: "request-logs",
+                mountPath: "/tmp/logs",
+              },
+            ],
+          },
+          // TODO(lunkai): use admission controller to inject.
+          // Logging container.
+          {
+            name: "logging",
+            image: loggingImage,
+            imagePullPolicy: "Always",
+            env: [
+              { name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" },
+            ],
+            resources: {
+              requests: {
+                memory: "250Mi",
+                cpu: "0.25",
+              },
+              limits: {
+                memory: "500Mi",
+                cpu: "0.5",
+              },
+            },
+            volumeMounts: [
+              {
+                name: "request-logs",
+                mountPath: "/tmp/logs",
+              },
+              {
+                name: "gcp-credentials",
+                mountPath: "/secret/gcp-credentials",
+              },
+              {
+                name: "fluentd-config-volume",
+                mountPath: "/fluentd/etc/custom",
+              },
+            ],
+          },
+        ],
+        volumes: [
+          {
+            name: "gcp-credentials",
+            secret: {
+              secretName: gcpSecretName,
+            },
+          },
+          {
+            name: "request-logs",
+            emptyDir: {},
+          },
+          {
+            configMap: {
+              name: "fluentd-config",
+            },
+            name: "fluentd-config-volume",
+          },
+        ],
+      },
+    },
+  },
+};
+
+k.core.v1.list.new([
+  service,
+  deployment,
+  configMap,
+])
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/tf-serving.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/tf-serving.libsonnet
similarity index 95%
rename from code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/tf-serving.libsonnet
rename to code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/tf-serving.libsonnet
index 6a1561be..0de66d8d 100644
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/tf-serving.libsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/tf-serving.libsonnet
@@ -18,12 +18,7 @@
     deployIstio: false,
 
     deployHttpProxy: false,
-    defaultHttpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2",
-    httpProxyImage: "",
-    httpProxyImageToUse: if $.params.httpProxyImage == "" then
-      $.params.defaultHttpProxyImage
-    else
-      $.params.httpProxyImage,
+    httpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2",
 
     serviceType: "ClusterIP",
 
@@ -57,10 +52,10 @@
     //  Name of the k8s secrets containing S3 credentials
     s3SecretName: "",
     // Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
-    s3SecretAccesskeyidKeyName: "",
+    s3SecretAccesskeyidKeyName: "AWS_ACCESS_KEY_ID",
 
     // Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
-    s3SecretSecretaccesskeyKeyName: "",
+    s3SecretSecretaccesskeyKeyName: "AWS_SECRET_ACCESS_KEY",
 
     // S3 region
     s3AwsRegion: "us-west-1",
@@ -122,7 +117,7 @@
       args: [
         "/usr/bin/tensorflow_model_server",
         "--port=9000",
-        "--rest_api_port=8000",
+        "--rest_api_port=9001",
         "--model_name=" + $.params.modelName,
         "--model_base_path=" + $.params.modelPath,
       ],
@@ -130,6 +125,9 @@
         {
           containerPort: 9000,
         },
+        {
+          containerPort: 9001,
+        },
       ],
       // TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
       // model-server doesn't have something we can use out of the box.
@@ -176,7 +174,7 @@
 
     httpProxyContainer:: {
       name: $.params.name + "-http-proxy",
-      image: $.params.httpProxyImageToUse,
+      image: $.params.httpProxyImage,
       imagePullPolicy: "IfNotPresent",
       command: [
         "python",
@@ -193,12 +191,12 @@
       ],
       resources: {
         requests: {
-          memory: "1Gi",
-          cpu: "1",
+          memory: "500Mi",
+          cpu: "0.5",
         },
         limits: {
-          memory: "4Gi",
-          cpu: "4",
+          memory: "1Gi",
+          cpu: "1",
         },
       },
       securityContext: {
@@ -274,6 +272,11 @@
             port: 9000,
             targetPort: 9000,
           },
+          {
+            name: "rest-tf-serving",
+            port: 9001,
+            targetPort: 9001,
+          },
           {
             name: "http-tf-serving-proxy",
             port: 8000,
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/util.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/util.libsonnet
similarity index 100%
rename from code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/util.libsonnet
rename to code_search/kubeflow/vendor/kubeflow/tf-serving@ab6084349673e6405ae486eb3be2141e3550643c/util.libsonnet
diff --git a/code_search/src/code_search/nmslib/cli/create_search_index.py b/code_search/src/code_search/nmslib/cli/create_search_index.py
index 0df5c03b..ddb26520 100644
--- a/code_search/src/code_search/nmslib/cli/create_search_index.py
+++ b/code_search/src/code_search/nmslib/cli/create_search_index.py
@@ -33,7 +33,7 @@ def create_search_index(argv=None):
   with open(tmp_lookup_file, 'w') as lookup_file:
     lookup_writer = csv.writer(lookup_file)
 
-    for csv_file_path in tf.gfile.Glob('{}/*.csv'.format(args.data_dir)):
+    for csv_file_path in tf.gfile.Glob('{}/*index*.csv'.format(args.data_dir)):
       tf.logging.debug('Reading {}'.format(csv_file_path))
 
       with tf.gfile.Open(csv_file_path) as csv_file:
diff --git a/code_search/src/code_search/t2t/function_docstring.py b/code_search/src/code_search/t2t/function_docstring.py
index a9c3a175..0fcc3ef4 100644
--- a/code_search/src/code_search/t2t/function_docstring.py
+++ b/code_search/src/code_search/t2t/function_docstring.py
@@ -4,11 +4,9 @@ from six import StringIO
 from tensor2tensor.data_generators import generator_utils
 from tensor2tensor.data_generators import text_problems
 from tensor2tensor.utils import metrics
-from tensor2tensor.utils import registry
 import tensorflow as tf
 
 
-@registry.register_problem
 class GithubFunctionDocstring(text_problems.Text2TextProblem):
   """Function and Docstring similarity Problem.
 
@@ -67,7 +65,7 @@ class GithubFunctionDocstring(text_problems.Text2TextProblem):
   @property
   def max_samples_for_vocab(self):
     # FIXME(sanyamkapoor): This exists to handle memory explosion.
-    return int(3.5e5)
+    return int(2e5)
 
   def get_csv_files(self, _data_dir, tmp_dir, _dataset_split):
     return [
diff --git a/code_search/src/code_search/t2t/function_docstring_extended.py b/code_search/src/code_search/t2t/function_docstring_extended.py
index 460647ca..8cf20119 100644
--- a/code_search/src/code_search/t2t/function_docstring_extended.py
+++ b/code_search/src/code_search/t2t/function_docstring_extended.py
@@ -2,7 +2,7 @@ from tensor2tensor.utils import registry
 from .function_docstring import GithubFunctionDocstring
 
 
-@registry.register_problem
+@registry.register_problem('cs_github_function_docstring')
 class GithubFunctionDocstringExtended(GithubFunctionDocstring):
   """Function Docstring problem with extended semantics.
 
diff --git a/code_search/src/code_search/t2t/similarity_transformer.py b/code_search/src/code_search/t2t/similarity_transformer.py
index 9a22be31..72e430ea 100644
--- a/code_search/src/code_search/t2t/similarity_transformer.py
+++ b/code_search/src/code_search/t2t/similarity_transformer.py
@@ -7,7 +7,7 @@ from tensor2tensor.utils import t2t_model
 import tensorflow as tf
 
 
-@registry.register_model
+@registry.register_model('cs_similarity_transformer')
 class SimilarityTransformer(t2t_model.T2TModel):
   """Transformer Model for Similarity between two strings.
 
diff --git a/code_search/src/requirements.txt b/code_search/src/requirements.txt
index 6b8fc458..91271d0d 100644
--- a/code_search/src/requirements.txt
+++ b/code_search/src/requirements.txt
@@ -7,5 +7,5 @@ numpy~=1.14.0
 oauth2client~=4.1.0
 requests~=2.18.0
 spacy~=2.0.0
-tensor2tensor~=1.6.0
+tensor2tensor~=1.7.0
 tensorflow~=1.8.0