From 4bd30a1e68739391626ff6d9fa54838f62f032fc Mon Sep 17 00:00:00 2001
From: Sanyam Kapoor <activatedgeek@users.noreply.github.com>
Date: Fri, 15 Jun 2018 18:16:34 -0700
Subject: [PATCH] Language task on kubeflow (#143)

* [WIP] initialize ksonnet app

* Push images to GCR

* Upgrade Docker container to run T2T entrypoint with appropriate env vars

* Add a tf-job based t2t-job

* Fix GPU parameters
---
 code_search/README.md                         | 65 +++++++++----
 code_search/kubeflow/.gitignore               |  5 +
 code_search/kubeflow/app.yaml                 | 31 +++++++
 .../kubeflow/components/params.libsonnet      | 33 +++++++
 .../components/t2t-gh-summarizer.jsonnet      |  7 ++
 .../kubeflow/components/t2t-job.libsonnet     | 67 ++++++++++++++
 .../kubeflow/environments/base.libsonnet      |  4 +
 .../kubeflow/vendor/kubeflow/tf-job/README.md | 91 +++++++++++++++++++
 .../vendor/kubeflow/tf-job/parts.yaml         | 35 +++++++
 .../kubeflow/tf-job/prototypes/tf-job.jsonnet | 65 +++++++++++++
 .../vendor/kubeflow/tf-job/tf-job.libsonnet   | 59 ++++++++++++
 .../vendor/kubeflow/tf-job/util.libsonnet     |  7 ++
 code_search/language_task/Dockerfile          |  9 +-
 code_search/language_task/build_image.sh      |  9 +-
 code_search/language_task/run.sh              | 36 --------
 code_search/language_task/t2t-entrypoint.sh   | 29 ++++++
 16 files changed, 496 insertions(+), 56 deletions(-)
 create mode 100644 code_search/kubeflow/.gitignore
 create mode 100644 code_search/kubeflow/app.yaml
 create mode 100644 code_search/kubeflow/components/params.libsonnet
 create mode 100644 code_search/kubeflow/components/t2t-gh-summarizer.jsonnet
 create mode 100644 code_search/kubeflow/components/t2t-job.libsonnet
 create mode 100644 code_search/kubeflow/environments/base.libsonnet
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/README.md
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet
 create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet
 delete mode 100755 code_search/language_task/run.sh
 create mode 100755 code_search/language_task/t2t-entrypoint.sh

diff --git a/code_search/README.md b/code_search/README.md
index ac8c652f..9bab5763 100644
--- a/code_search/README.md
+++ b/code_search/README.md
@@ -72,53 +72,82 @@ $ python preprocess/scripts/process_github_archive.py -i files/select_github_arc
 ## 2. Model Training
 
 A `Dockerfile` based on Tensorflow is provided along which has all the dependencies for this part of the pipeline. 
-By default, it is based off Tensorflow CPU 1.8.0 for `Python3` but can be overridden in the Docker image build using
-the following command
+By default, it is based off Tensorflow CPU 1.8.0 for `Python3` but can be overridden in the Docker image build.
+This script builds and pushes the docker image to Google Container Registry.
 
+### 2.1 Build & Push images to GCR
+
+**NOTE**: The images can be pushed to any registry of choice but rest of the 
+
+* Authenticate with GCR
 ```
-$ export BUILD_IMAGE_TAG=my-new-tag # (optional) to change built image tag
+$ gcloud auth configure-docker
+```
+
+* Setup environment variables
+```
+$ export PROJECT=<your_project> # (optional) setup project ID. if not set, image is not published to GCR
+$ export BUILD_IMAGE_TAG=code-search:devel # (optional) to change built image tag
 $ export BASE_IMAGE_TAG=1.8.0-gpu-py3 # (optional) for GPU base image
+```
+
+* Build and push the image
+```
 $ ./language_task/build_image.sh
 ```
 
-### 2.1 Function Summarizer
+See [GCR Pushing and Pulling Images](https://cloud.google.com/container-registry/docs/pushing-and-pulling) for more.
+
+
+### 2.2 Train Locally
+
+**WARNING**: The container might run out of memory and be killed.
+
+#### 2.2.1 Function Summarizer
 
 This part generates a model to summarize functions into docstrings using the data generated in previous
 step. It uses `tensor2tensor`.
 
 * Generate `TFRecords` for training
 ```
-$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory
-$ export DOCKER_ENTRYPOINT=t2t-datagen # (required)
-$ ./language_task/run.sh --problem=github_function_summarizer
+$ export MOUNT_DATA_DIR=/path/to/data/folder
+$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data ${BUILD_IMAGE_TAG} \
+    t2t-datagen --problem=github_function_summarizer --data_dir=/data
 ```
 
 * Train transduction model using `Tranformer Networks` and a base hyper-parameters set
 ```
-$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory
-$ export DOCKER_ENTRYPOINT=t2t-trainer # (required)
-$ ./language_task/run.sh --problem=github_function_summarizer --model=transformer --hparams_set=transformer_base
+$ export MOUNT_DATA_DIR=/path/to/data/folder
+$ export MOUNT_OUTPUT_DIR=/path/to/output/folder
+$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data -v ${MOUNT_OUTPUT_DIR}:/output ${BUILD_IMAGE_TAG} \
+    t2t-trainer --problem=github_function_summarizer --data_dir=/data --output_dir=/output \
+                --model=transformer --hparams_set=transformer_base
 ```
 
-### 2.2 Docstrings Language Model
+#### 2.2.2 Docstrings Language Model
 
 This part trains a language model based on the docstrings in the dataset and uses `tensor2tensor`
 
 * Generate `TFRecords` for training
 ```
-$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory
-$ export DOCKER_ENTRYPOINT=t2t-datagen # (required)
-$ ./language_task/run.sh --problem=github_docstring_language_model
+$ export MOUNT_DATA_DIR=/path/to/data/folder
+$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data ${BUILD_IMAGE_TAG} \
+    t2t-datagen --problem=github_docstring_language_model --data_dir=/data
 ```
 
 * Train language model using `Tranformer Networks` and a custom hyper-parameters set
 ```
-$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory
-$ export MOUNT_OUTPUT_DIR=/path/to/output/folder # (optional) mount a local output directory
-$ export DOCKER_ENTRYPOINT=t2t-trainer # (required)
-$ ./language_task/run.sh --problem=github_docstring_language_model --model=transformer --hparams_set=transformer_gh_lm
+$ export MOUNT_DATA_DIR=/path/to/data/folder
+$ export MOUNT_OUTPUT_DIR=/path/to/output/folder
+$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data -v ${MOUNT_OUTPUT_DIR}:/output ${BUILD_IMAGE_TAG} \
+    t2t-trainer --problem=github_docstring_language_model --data_dir=/data --output_dir=/output \
+                --model=transformer --hparams_set=transformer_gh_lm
 ```
 
+### 2.3 Train on Kubeflow
+
+TODO
+
 # Acknowledgements
 
 This project derives from [hamelsmu/code_search](https://github.com/hamelsmu/code_search).
diff --git a/code_search/kubeflow/.gitignore b/code_search/kubeflow/.gitignore
new file mode 100644
index 00000000..0e218529
--- /dev/null
+++ b/code_search/kubeflow/.gitignore
@@ -0,0 +1,5 @@
+/lib
+/.ksonnet/registries
+/app.override.yaml
+/.ks_environment
+/environments
diff --git a/code_search/kubeflow/app.yaml b/code_search/kubeflow/app.yaml
new file mode 100644
index 00000000..e59f772c
--- /dev/null
+++ b/code_search/kubeflow/app.yaml
@@ -0,0 +1,31 @@
+apiVersion: 0.1.0
+environments:
+  default:
+    destination:
+      namespace: kubeflow
+      server: https://130.211.225.204
+    k8sVersion: v1.9.6
+    path: default
+kind: ksonnet.io/app
+libraries:
+  tf-job:
+    gitVersion:
+      commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9
+      refSpec: master
+    name: tf-job
+    registry: kubeflow
+name: kubeflow
+registries:
+  incubator:
+    gitVersion:
+      commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
+      refSpec: master
+    protocol: github
+    uri: github.com/ksonnet/parts/tree/master/incubator
+  kubeflow:
+    gitVersion:
+      commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9
+      refSpec: master
+    protocol: github
+    uri: github.com/kubeflow/kubeflow/tree/master/kubeflow
+version: 0.0.1
diff --git a/code_search/kubeflow/components/params.libsonnet b/code_search/kubeflow/components/params.libsonnet
new file mode 100644
index 00000000..324a48cc
--- /dev/null
+++ b/code_search/kubeflow/components/params.libsonnet
@@ -0,0 +1,33 @@
+{
+  global: {
+    // User-defined global parameters; accessible to all component and environments, Ex:
+    // replicas: 4,
+  },
+  components: {
+    // Component-level parameters, defined initially from 'ks prototype use ...'
+    // Each object below should correspond to a component in the components/ directory
+    "t2t-job": {
+      numWorker: 1,
+      numMaster: 1,
+      numPs: 1,
+      numWorkerGpu: 0,
+      numPsGpu: 0,
+
+      train_steps: 100,
+      eval_steps: 10,
+
+      image: "gcr.io/kubeflow-dev/code-search:devel",
+      imageGpu: "gcr.io/kubeflow-dev/code-search:gpu-devel",
+      imagePullSecrets: [],
+    },
+
+    "t2t-gh-summarizer": {
+      "name": "github_function_summarizer",
+      "problem": "github_function_summarizer",
+      "dataDir": "gs://kubeflow-dev/code-search/raw_data",
+      "outputDir": "gs://kubeflow-dev/code-search/train",
+      "model": "transformer",
+      "hparams_set": "transformer_base"
+    },
+  },
+}
diff --git a/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet b/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet
new file mode 100644
index 00000000..bcbc8132
--- /dev/null
+++ b/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet
@@ -0,0 +1,7 @@
+local k = import "k.libsonnet";
+local t2tJob = import "t2t-job.libsonnet";
+
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components["t2t-gh-summarizer"];
+
+std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job]))
diff --git a/code_search/kubeflow/components/t2t-job.libsonnet b/code_search/kubeflow/components/t2t-job.libsonnet
new file mode 100644
index 00000000..23d58f85
--- /dev/null
+++ b/code_search/kubeflow/components/t2t-job.libsonnet
@@ -0,0 +1,67 @@
+local tfJob = import "kubeflow/tf-job/tf-job.libsonnet";
+local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
+
+{
+  parts(newParams, env):: {
+    local params = baseParams + newParams,
+
+    local t2tCmd = {
+      datagen: [
+        "t2t-datagen",
+        "--problem=" + params.problem,
+        "--data_dir=" + params.dataDir,
+      ],
+
+      trainer: [
+        "t2t-trainer",
+        "--problem=" + params.problem,
+        "--data_dir=" + params.dataDir,
+        "--output_dir=" + params.outputDir,
+        "--model=" + params.model,
+        "--hparams_set=" + params.hparams_set,
+        "--train_steps=" + std.toString(params.train_steps),
+      ],
+
+      workerBase: self.trainer + [
+        "--schedule=train",
+        "--ps_gpu=" + std.toString(params.numPsGpu),
+        "--worker_gpu=" + std.toString(params.numWorkerGpu),
+        "--worker_replicas=" + std.toString(params.numWorker + params.numMaster),
+        "--ps_replicas=" + std.toString(params.numPs),
+        "--eval_steps=" + std.toString(params.eval_steps),
+      ],
+
+      ps: self.trainer + [
+        "--schedule=run_std_server",
+        "--ps_job=/job:ps",
+      ],
+
+      worker: self.workerBase + [
+        "--worker_job=/job:worker",
+      ],
+
+      master: self.workerBase + [
+        "--worker_job=/job:master",
+      ],
+    },
+
+    local terminationPolicy = if params.numMaster == 1
+                              then tfJob.parts.tfJobTerminationPolicy("MASTER", 0)
+                              else tfJob.parts.tfJobTerminationPolicy("WORKER", 0),
+
+    local workerImage = if params.numWorkerGpu > 0 then params.imageGpu else params.image,
+    local psImage = if params.numPsGpu > 0 then params.imageGpu else params.image,
+
+    job::
+      tfJob.parts.tfJob(
+        params.name,
+        env.namespace,
+        [
+          tfJob.parts.tfJobReplica("MASTER", params.numMaster, t2tCmd.master, workerImage, params.imagePullSecrets, params.numWorkerGpu),
+          tfJob.parts.tfJobReplica("WORKER", params.numWorker, t2tCmd.worker, workerImage, params.imagePullSecrets, params.numWorkerGpu),
+          tfJob.parts.tfJobReplica("PS", params.numPs, t2tCmd.ps, psImage, params.imagePullSecrets, params.numPsGpu),
+        ],
+        terminationPolicy
+      ),
+  },
+}
diff --git a/code_search/kubeflow/environments/base.libsonnet b/code_search/kubeflow/environments/base.libsonnet
new file mode 100644
index 00000000..a129affb
--- /dev/null
+++ b/code_search/kubeflow/environments/base.libsonnet
@@ -0,0 +1,4 @@
+local components = std.extVar("__ksonnet/components");
+components + {
+  // Insert user-specified overrides here.
+}
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/README.md b/code_search/kubeflow/vendor/kubeflow/tf-job/README.md
new file mode 100644
index 00000000..9825b264
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/README.md
@@ -0,0 +1,91 @@
+<!-- START doctoc generated TOC please keep comment here to allow auto update -->
+<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
+**Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
+
+- [tf-job](#tf-job)
+  - [Quickstart](#quickstart)
+  - [Using the library](#using-the-library)
+    - [io.ksonnet.pkg.tf-job](#ioksonnetpkgtf-job)
+      - [Example](#example)
+      - [Parameters](#parameters)
+      - [Example](#example-1)
+      - [Parameters](#parameters-1)
+
+<!-- END doctoc generated TOC please keep comment here to allow auto update -->
+
+# tf-job
+
+> Prototypes for running TensorFlow jobs.
+
+
+* [Quickstart](#quickstart)
+* [Using Prototypes](#using-prototypes)
+  * [io.ksonnet.pkg.tf-job](#io.ksonnet.pkg.tf-job)
+  * [io.ksonnet.pkg.tf-cnn](#io.ksonnet.pkg.tf-cnn)
+
+## Quickstart
+
+*The following commands use the `io.ksonnet.pkg.tf-job` prototype to generate Kubernetes YAML for tf-job, and then deploys it to your Kubernetes cluster.*
+
+First, create a cluster and install the ksonnet CLI (see root-level [README.md](rootReadme)).
+
+If you haven't yet created a [ksonnet application](linkToSomewhere), do so using `ks init <app-name>`.
+
+Finally, in the ksonnet application directory, run the following:
+
+```shell
+# Expand prototype as a Jsonnet file, place in a file in the
+# `components/` directory. (YAML and JSON are also available.)
+$ ks prototype use io.ksonnet.pkg.tf-job tf-job \
+  --namespace default \
+  --name tf-job
+
+# Apply to server.
+$ ks apply -f tf-job.jsonnet
+```
+
+## Using the library
+
+The library files for tf-job define a set of relevant *parts* (_e.g._, deployments, services, secrets, and so on) that can be combined to configure tf-job for a wide variety of scenarios. For example, a database like Redis may need a secret to hold the user password, or it may have no password if it's acting as a cache.
+
+This library provides a set of pre-fabricated "flavors" (or "distributions") of tf-job, each of which is configured for a different use case. These are captured as ksonnet *prototypes*, which allow users to interactively customize these distributions for their specific needs.
+
+These prototypes, as well as how to use them, are enumerated below.
+
+### io.ksonnet.pkg.tf-job
+
+A TensorFlow job (could be training or evaluation).
+#### Example
+
+```shell
+# Expand prototype as a Jsonnet file, place in a file in the
+# `components/` directory. (YAML and JSON are also available.)
+$ ks prototype use io.ksonnet.pkg.tf-job tf-job \
+  --name YOUR_NAME_HERE
+```
+
+#### Parameters
+
+The available options to pass prototype are:
+
+* `--name=<name>`: Name to give to each of the components [string]
+### io.ksonnet.pkg.tf-cnn
+
+A TensorFlow CNN Benchmarking job
+#### Example
+
+```shell
+# Expand prototype as a Jsonnet file, place in a file in the
+# `components/` directory. (YAML and JSON are also available.)
+$ ks prototype use io.ksonnet.pkg.tf-cnn tf-job \
+  --name YOUR_NAME_HERE
+```
+
+#### Parameters
+
+The available options to pass prototype are:
+
+* `--name=<name>`: Name for the job. [string]
+
+
+[rootReadme]: https://github.com/ksonnet/mixins
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml b/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml
new file mode 100644
index 00000000..905ff0a9
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml
@@ -0,0 +1,35 @@
+{
+   "name": "tf-job",
+   "apiVersion": "0.0.1",
+   "kind": "ksonnet.io/parts",
+   "description": "Prototypes for running TensorFlow jobs.\n",
+   "author": "kubeflow team <kubeflow-team@google.com>",
+   "contributors": [
+      {
+         "name": "Jeremy Lewi",
+         "email": "jlewi@google.com"
+      }
+   ],
+   "repository": {
+      "type": "git",
+      "url": "https://github.com/kubeflow/kubeflow"
+   },
+   "bugs": {
+      "url": "https://github.com/kubeflow/kubeflow/issues"
+   },
+   "keywords": [
+      "kubeflow",
+      "tensorflow",
+      "database"
+   ],
+   "quickStart": {
+      "prototype": "io.ksonnet.pkg.tf-job",
+      "componentName": "tf-job",
+      "flags": {
+         "name": "tf-job",
+         "namespace": "default"
+      },
+      "comment": "Run TensorFlow Job"
+   },
+   "license": "Apache 2.0"
+}
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet
new file mode 100644
index 00000000..f3e5e68b
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet
@@ -0,0 +1,65 @@
+// @apiVersion 0.1
+// @name io.ksonnet.pkg.tf-job
+// @description A TensorFlow job (could be training or evaluation).
+// @shortDescription A TensorFlow job.
+// @param name string Name to give to each of the components
+// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
+// @optionalParam args string null Comma separated list of arguments to pass to the job
+// @optionalParam image string null The docker image to use for the job.
+// @optionalParam image_gpu string null The docker image to use when using GPUs.
+// @optionalParam image_pull_secrets string null Comma-delimited list of secret names to use credentials in pulling your docker images.
+// @optionalParam num_masters number 1 The number of masters to use
+// @optionalParam num_ps number 0 The number of ps to use
+// @optionalParam num_workers number 0 The number of workers to use
+// @optionalParam num_gpus number 0 The number of GPUs to attach to workers.
+
+// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--".
+
+local k = import "k.libsonnet";
+local tfJob = import "kubeflow/tf-job/tf-job.libsonnet";
+// updatedParams uses the environment namespace if
+// the namespace parameter is not explicitly set
+local updatedParams = params {
+  namespace: if params.namespace == "null" then env.namespace else params.namespace,
+};
+
+local name = import "param://name";
+local namespace = updatedParams.namespace;
+
+local argsParam = import "param://args";
+local args =
+  if argsParam == "null" then
+    []
+  else
+    std.split(argsParam, ",");
+
+local image = import "param://image";
+local imageGpu = import "param://image_gpu";
+local imagePullSecrets = import "param://image_pull_secrets";
+local numMasters = import "param://num_masters";
+local numPs = import "param://num_ps";
+local numWorkers = import "param://num_workers";
+local numGpus = import "param://num_gpus";
+
+local terminationPolicy = if numMasters == 1 then
+  tfJob.parts.tfJobTerminationPolicy("MASTER", 0)
+else
+  tfJob.parts.tfJobTerminationPolicy("WORKER", 0);
+
+local workerSpec = if numGpus > 0 then
+  tfJob.parts.tfJobReplica("WORKER", numWorkers, args, imageGpu, imagePullSecrets, numGpus)
+else
+  tfJob.parts.tfJobReplica("WORKER", numWorkers, args, image, imagePullSecrets);
+
+std.prune(k.core.v1.list.new([
+  tfJob.parts.tfJob(
+    name,
+    namespace,
+    [
+      tfJob.parts.tfJobReplica("MASTER", numMasters, args, image, imagePullSecrets),
+      workerSpec,
+      tfJob.parts.tfJobReplica("PS", numPs, args, image, imagePullSecrets),
+    ],
+    terminationPolicy
+  ),
+]))
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet
new file mode 100644
index 00000000..2ac2f744
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet
@@ -0,0 +1,59 @@
+local k = import "k.libsonnet";
+local util = import "util.libsonnet";
+
+{
+  parts:: {
+    tfJobReplica(replicaType, number, args, image, imagePullSecrets=[], numGpus=0)::
+      local baseContainer = {
+        image: image,
+        name: "tensorflow",
+      };
+      local containerArgs = if std.length(args) > 0 then
+        {
+          args: args,
+        }
+      else {};
+      local resources = if numGpus > 0 then {
+        resources: {
+          limits: {
+            "nvidia.com/gpu": numGpus,
+          },
+        },
+      } else {};
+      if number > 0 then
+        {
+          replicas: number,
+          template: {
+            spec: {
+              imagePullSecrets: [{ name: secret } for secret in util.toArray(imagePullSecrets)],
+              containers: [
+                baseContainer + containerArgs + resources,
+              ],
+              restartPolicy: "OnFailure",
+            },
+          },
+          tfReplicaType: replicaType,
+        }
+      else {},
+
+    tfJobTerminationPolicy(replicaName, replicaIndex):: {
+      chief: {
+        replicaName: replicaName,
+        replicaIndex: replicaIndex,
+      },
+    },
+
+    tfJob(name, namespace, replicas, tp):: {
+      apiVersion: "kubeflow.org/v1alpha1",
+      kind: "TFJob",
+      metadata: {
+        name: name,
+        namespace: namespace,
+      },
+      spec: {
+        replicaSpecs: replicas,
+        terminationPolicy: tp,
+      },
+    },
+  },
+}
diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet
new file mode 100644
index 00000000..d5458f40
--- /dev/null
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet
@@ -0,0 +1,7 @@
+{
+  // Convert a comma-delimited string to an array.
+  toArray(str)::
+    if std.type(str) == "string" && str != "null" && std.length(str) > 0 then
+      std.split(str, ",")
+    else [],
+}
diff --git a/code_search/language_task/Dockerfile b/code_search/language_task/Dockerfile
index 489e8802..a97a0722 100644
--- a/code_search/language_task/Dockerfile
+++ b/code_search/language_task/Dockerfile
@@ -6,8 +6,15 @@ FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
 
 ADD requirements.txt /
 
-RUN pip3 --no-cache-dir install -r /requirements.txt
+RUN pip3 --no-cache-dir install -r /requirements.txt &&\
+    apt-get update && apt-get install -y jq &&\
+    rm -rf /var/lib/apt/lists/*
 
 VOLUME ["/data", "/output"]
 
 ADD t2t_problems/* /t2t_problems/
+ADD t2t-entrypoint.sh /usr/local/sbin/t2t-entrypoint
+
+ENV T2T_USR_DIR=/t2t_problems
+
+ENTRYPOINT ["/usr/local/sbin/t2t-entrypoint"]
diff --git a/code_search/language_task/build_image.sh b/code_search/language_task/build_image.sh
index b0458e46..d7a29ced 100755
--- a/code_search/language_task/build_image.sh
+++ b/code_search/language_task/build_image.sh
@@ -2,8 +2,9 @@
 
 set -e
 
+PROJECT=${PROJECT:-}
 BASE_IMAGE_TAG=${BASE_IMAGE_TAG:-1.8.0-py3} # 1.8.0-gpu-py3 for GPU-based image
-BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-semantic-code-search:devel}
+BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-code-search:devel}
 
 # Directory of this script used as docker context
 _SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
@@ -12,4 +13,10 @@ pushd "$_SCRIPT_DIR"
 
 docker build -t ${BUILD_IMAGE_TAG} --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} .
 
+# Push image to GCR if PROJECT available
+if [[ ! -z "${PROJECT}" ]]; then
+    docker tag ${BUILD_IMAGE_TAG} gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
+    docker push gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
+fi
+
 popd
diff --git a/code_search/language_task/run.sh b/code_search/language_task/run.sh
deleted file mode 100755
index eafadc87..00000000
--- a/code_search/language_task/run.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-# Script Variables
-IMAGE_TAG=${IMAGE_TAG:-semantic-code-search:devel}
-DOCKER_ENTRYPOINT=${DOCKER_ENTRYPOINT:-}
-
-MOUNT_DATA_DIR=${MOUNT_DATA_DIR:-}
-MOUNT_OUTPUT_DIR=${MOUNT_OUTPUT_DIR:-}
-
-DATA_DIR=${DATA_DIR:-/data}
-OUTPUT_DIR=${OUTPUT_DIR:-/output}
-
-# Internal Variables
-_DOCKER_RUN_OPTS="-it --rm --entrypoint=${DOCKER_ENTRYPOINT}"
-_DOCKER_CMD="${@} --t2t_usr_dir=/t2t_problems --tmp_dir=/tmp --data_dir=${DATA_DIR} --output_dir=${OUTPUT_DIR}"
-
-if [[ -z ${DOCKER_ENTRYPOINT} ]]; then
-    echo "ERROR: Missing DOCKER_ENTRYPOINT environment variable! Use 't2t-datagen' or 't2t-trainer'"
-    exit 1
-fi
-
-# Mount local directories (if specified)
-if [[ ! -z ${MOUNT_DATA_DIR} ]]; then
-    _DOCKER_RUN_OPTS="${_DOCKER_RUN_OPTS} -v ${MOUNT_DATA_DIR}:${DATA_DIR}:rw"
-fi
-
-if [[ ! -z ${MOUNT_OUTPUT_DIR} ]]; then
-    _DOCKER_RUN_OPTS="${_DOCKER_RUN_OPTS} -v ${MOUNT_OUTPUT_DIR}:${OUTPUT_DIR}:rw"
-fi
-
-_FINAL_CMD="docker run ${_DOCKER_RUN_OPTS} ${IMAGE_TAG} ${_DOCKER_CMD}"
-
-echo "${_FINAL_CMD}"
-eval "${_FINAL_CMD}"
diff --git a/code_search/language_task/t2t-entrypoint.sh b/code_search/language_task/t2t-entrypoint.sh
new file mode 100755
index 00000000..c501d7a6
--- /dev/null
+++ b/code_search/language_task/t2t-entrypoint.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+set -e
+
+T2T_USR_DIR=${T2T_USR_DIR:-}
+TARGET_BIN="${1}"
+TARGET_BIN_OPTS="--tmp_dir=/tmp"
+
+# Add T2T user directory for new problems
+if [[ ! -z "${T2T_USR_DIR}" ]]; then
+    TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --t2t_usr_dir=${T2T_USR_DIR}"
+fi
+
+# Process TF_CONFIG to pass parameters distributed training parameters to `t2t-trainer`
+TF_CONFIG=${TF_CONFIG:-}
+if [[ ! -z "${TF_CONFIG}" ]]; then
+    WORKER_ID=$(echo "${TF_CONFIG}" | jq ".task.index")
+    WORKER_TYPE=$(echo "${TF_CONFIG}" | jq -r ".task.type")
+    MASTER_INSTANCE=$(echo "${TF_CONFIG}" | jq -r ".cluster.${WORKER_TYPE}[${WORKER_ID}]")
+
+    if [[ "${TARGET_BIN}" = "t2t-trainer" ]]; then
+        TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --master=grpc://${MASTER_INSTANCE} --worker_id=${WORKER_ID}"
+    fi
+fi
+
+EVAL_CMD="${TARGET_BIN} ${TARGET_BIN_OPTS} ${@:2}"
+
+echo "Running command: '${EVAL_CMD}'"
+eval "${EVAL_CMD}"