Disable Distributed Training (#207)

* Upgrade TFJob and Ksonnet app * Container name should be tensorflow. See #563. * Working single node training and serving on Kubeflow * Add issue link for fixme * Remove redundant create secrets and use Kubeflow provided secrets
2018-08-02 23:02:05 -07:00 · 2018-08-02 23:02:05 -07:00 · e9e844022e
parent 091eacb4f6
commit e9e844022e
18 changed files with 96 additions and 417 deletions
--- a/code_search/README.md
+++ b/code_search/README.md
@ -38,16 +38,6 @@ $ gcloud auth configure-docker

 See [Google Cloud Docs](https://cloud.google.com/docs/) for more.

-### Create Kubernetes Secrets 
-
-This is needed for deployed pods in the Kubernetes cluster to access Google Cloud resources.
-
-```
-$ PROJECT=my-project ./create_secrets.sh
-```
-
-**NOTE**: Use `create_secrets.sh -d` to remove any side-effects of the above step.
-
 ### Python Environment Setup

 This demo needs multiple Python versions and `virtualenv` is an easy way to
@ -74,7 +64,7 @@ See [Virtualenv Docs](https://virtualenv.pypa.io/en/stable/) for more.
 To install dependencies, run the following commands

 ```
-(env2.7) $ pip install https://github.com/kubeflow/batch-predict/tarball/master
+(env2.7) $ pip install https://github.com/activatedgeek/batch-predict/tarball/fix-value-provider
 (env2.7) $ pip install src/
 ```

--- a/code_search/create_secrets.sh
+++ b/code_search/create_secrets.sh
@ -1,57 +0,0 @@
-#!/usr/bin/env bash
-
-##
-# This script creates all the necessary service accounts and permissions
-# needed for the training jobs to pull private images from
-# Google Cloud Registry and access Google Cloud Storage. To
-# undo all the changes made, add a "-d" flag while executing the
-# script.
-#
-
-set -ex
-
-export PROJECT=${PROJECT:-}
-
-if [[ -z "${PROJECT}" ]]; then
-  echo "PROJECT environment variable missing!"
-  exit 1
-fi
-
-export SA_NAME=code-search-access
-export SA_EMAIL=${SA_NAME}@${PROJECT}.iam.gserviceaccount.com
-export SA_KEY_FILE=${SA_EMAIL}.key.json
-
-
-if [[ "${1}" = "-d" ]]; then
-  gcloud projects remove-iam-policy-binding ${PROJECT} \
-    --member=serviceAccount:${SA_EMAIL} \
-    --role=roles/storage.admin
-
-  gcloud iam service-accounts delete ${SA_EMAIL} --quiet
-
-  rm -f ${SA_KEY_FILE}
-
-  kubectl delete secret gcp-credentials gcp-registry-credentials
-
-  exit 0
-fi
-
-
-gcloud iam service-accounts create ${SA_NAME} --display-name ${SA_EMAIL}
-
-gcloud projects add-iam-policy-binding ${PROJECT} \
-  --member=serviceAccount:${SA_EMAIL} \
-  --role=roles/storage.admin
-
-gcloud iam service-accounts keys create ${SA_KEY_FILE} \
-  --iam-account=${SA_EMAIL}
-
-kubectl create secret docker-registry gcp-registry-credentials \
-  --docker-server=https://gcr.io \
-  --docker-username=_json_key \
-  --docker-password="$(cat ${SA_KEY_FILE})" \
-  --docker-email=${SA_EMAIL}
-
-kubectl create secret generic gcp-credentials \
-  --from-file=key.json="${SA_KEY_FILE}"
-
--- a/code_search/docker/t2t/build.sh
+++ b/code_search/docker/t2t/build.sh
@ -8,23 +8,36 @@

 set -ex

-GPU=${GPU:-0}
-BASE_IMAGE_TAG=$([[ "${GPU}" = "1" ]] && echo "1.8.0-gpu" || echo "1.8.0")
 BUILD_IMAGE_UUID=$(python3 -c 'import uuid; print(uuid.uuid4().hex[:7]);')
-BUILD_IMAGE_TAG="code-search:v$(date +%Y%m%d)$([[ ${GPU} = "1" ]] && echo '-gpu' || echo '')-${BUILD_IMAGE_UUID}"
+BUILD_IMAGE_TAG="code-search:v$(date +%Y%m%d)-${BUILD_IMAGE_UUID}"

 # Directory of this script used for path references
 _SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

 pushd "${_SCRIPT_DIR}"

-docker build -f "${_SCRIPT_DIR}/Dockerfile" -t ${BUILD_IMAGE_TAG} --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} "${_SCRIPT_DIR}/../.."
+# Build CPU image
+docker build -f "${_SCRIPT_DIR}/Dockerfile" \
+             -t ${BUILD_IMAGE_TAG} \
+             --build-arg BASE_IMAGE_TAG=1.8.0 \
+             "${_SCRIPT_DIR}/../.."

-# Push image to GCR PROJECT available
+# Build GPU image
+docker build -f "${_SCRIPT_DIR}/Dockerfile" \
+             -t ${BUILD_IMAGE_TAG}-gpu \
+             --build-arg BASE_IMAGE_TAG=1.8.0-gpu \
+             "${_SCRIPT_DIR}/../.."
+
+# Push images to GCR Project if available
 PROJECT=${PROJECT:-}
 if [[ ! -z "${PROJECT}" ]]; then
+  # Tag and push CPU image
  docker tag ${BUILD_IMAGE_TAG} gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
  docker push gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
+
+  # Tag and push GPU image
+  docker tag ${BUILD_IMAGE_TAG}-gpu gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}-gpu
+  docker push gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}-gpu
 fi

 popd
--- a/code_search/docker/t2t/t2t-entrypoint.sh
+++ b/code_search/docker/t2t/t2t-entrypoint.sh
@ -16,11 +16,13 @@ TF_CONFIG=${TF_CONFIG:-}
 if [[ ! -z "${TF_CONFIG}" ]]; then
    WORKER_ID=$(echo "${TF_CONFIG}" | jq ".task.index")
    WORKER_TYPE=$(echo "${TF_CONFIG}" | jq -r ".task.type")
-    MASTER_INSTANCE=$(echo "${TF_CONFIG}" | jq -r ".cluster.master[0]")
+    MASTER_INSTANCE=$(echo "${TF_CONFIG}" | jq -r ".cluster.${WORKER_TYPE}[${WORKER_ID}]")

-    if [[ "${TARGET_BIN}" = "t2t-trainer" ]]; then
-        TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --master=grpc://${MASTER_INSTANCE} --worker_id=${WORKER_ID}"
-    fi
+    # FIXME(sanyamkapoor): Distributed training hangs. See kubeflow/examples#208.
+    # if [[ "${TARGET_BIN}" = "t2t-trainer" ]]; then
+    #   TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --master=grpc://${MASTER_INSTANCE} --worker_id=${WORKER_ID}"
+    # fi
+    unset TF_CONFIG
 fi

 EVAL_CMD="${TARGET_BIN} ${TARGET_BIN_OPTS} ${@:2}"
--- a/code_search/kubeflow/app.yaml
+++ b/code_search/kubeflow/app.yaml
@ -1,37 +1,23 @@
-apiVersion: 0.1.0
+apiVersion: 0.2.0
 environments:
-  code-search:
+  kf-cs:
    destination:
      namespace: kubeflow
-      server: https://35.193.190.6
+      server: https://35.232.164.190
    k8sVersion: v1.9.6
-    path: code-search
+    path: kf-cs
 kind: ksonnet.io/app
 libraries:
-  tf-job:
-    gitVersion:
-      commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9
-      refSpec: master
-    name: tf-job
-    registry: kubeflow
  tf-serving:
-    gitVersion:
-      commitSha: e1b2aee865866b2e7e4f8c41b34ae03b4c4bb0db
-      refSpec: master
    name: tf-serving
    registry: kubeflow
+    version: e95f94a1a97a0974ada734895d590b5ba565fa77
 name: kubeflow
 registries:
  incubator:
-    gitVersion:
-      commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
-      refSpec: master
    protocol: github
    uri: github.com/ksonnet/parts/tree/master/incubator
  kubeflow:
-    gitVersion:
-      commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9
-      refSpec: master
    protocol: github
-    uri: github.com/kubeflow/kubeflow/tree/master/kubeflow
+    uri: https://github.com/kubeflow/kubeflow/tree/v0.2.2/kubeflow
 version: 0.0.1
--- a/code_search/kubeflow/components/params.libsonnet
+++ b/code_search/kubeflow/components/params.libsonnet
@ -2,6 +2,7 @@
  global: {
    // User-defined global parameters; accessible to all component and environments, Ex:
    // replicas: 4,
+    t2tWorkingDir: "gs://kubeflow-examples/t2t-code-search/20180802",
  },
  components: {
    // Component-level parameters, defined initially from 'ks prototype use ...'
@ -9,8 +10,7 @@
    "t2t-job": {
      jobType: "trainer",

-      numMaster: 1,
-      numWorker: 0,
+      numWorker: 1,
      numPs: 0,
      numWorkerGpu: 0,
      numPsGpu: 0,
@ -18,8 +18,8 @@
      train_steps: 100,
      eval_steps: 10,

-      image: "gcr.io/kubeflow-dev/code-search:v20180719-f04a4b7",
-      imageGpu: "gcr.io/kubeflow-dev/code-search:v20180719-gpu-9b8b4a8",
+      image: "gcr.io/kubeflow-dev/code-search:v20180802-c622aac",
+      imageGpu: "gcr.io/kubeflow-dev/code-search:v20180802-c622aac-gpu",
      imagePullSecrets: [],

      dataDir: "null",
@ -28,18 +28,23 @@
      hparams_set: "null",
    },

+    "t2t-code-search-datagen": {
+      jobType: "datagen",
+
+      name: "t2t-code-search-datagen",
+
+      problem: "github_function_docstring",
+      dataDir: $.global.t2tWorkingDir + "/data",
+    },
+
    "t2t-code-search-trainer": {
      jobType: "trainer",
-      numWorker: 2,
-      numPs: 1,
-      // numWorkerGpu: 1,
-      // numPsGpu: 1,

      name: "t2t-code-search-trainer",

      problem: "github_function_docstring",
-      dataDir: "gs://kubeflow-examples/t2t-code-search/data",
-      outputDir: "gs://kubeflow-examples/t2t-code-search/output",
+      dataDir: $.global.t2tWorkingDir + "/data",
+      outputDir: $.global.t2tWorkingDir + "/output",
      model: "similarity_transformer",
      hparams_set: "transformer_tiny",
    },
@ -50,8 +55,8 @@
      name: "t2t-code-search-exporter",

      problem: "github_function_docstring",
-      dataDir: "gs://kubeflow-examples/t2t-code-search/data",
-      outputDir: "gs://kubeflow-examples/t2t-code-search/output",
+      dataDir: $.global.t2tWorkingDir + "/data",
+      outputDir: $.global.t2tWorkingDir + "/output",
      model: "similarity_transformer",
      hparams_set: "transformer_tiny",
    },
@ -59,8 +64,8 @@
    "t2t-code-search-serving": {
      name: "t2t-code-search",

-      modelName: "t2t_code_search",
-      modelPath: "gs://kubeflow-examples/t2t-code-search/output/export/Servo",
+      modelName: "t2t-code-search",
+      modelPath: $.global.t2tWorkingDir + "/output/export/Servo",
      modelServerImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.8:latest",
      cloud: "gcp",
      gcpCredentialSecretName: "gcp-credentials",
--- a/code_search/kubeflow/components/t2t-code-search-datagen.jsonnet
+++ b/code_search/kubeflow/components/t2t-code-search-datagen.jsonnet
@ -0,0 +1,7 @@
+local k = import "k.libsonnet";
+local t2tJob = import "t2t-job.libsonnet";
+
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components["t2t-code-search-datagen"];
+
+std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job]))
--- a/code_search/kubeflow/components/t2t-job.libsonnet
+++ b/code_search/kubeflow/components/t2t-job.libsonnet
@ -1,9 +1,9 @@
-local tfJob = import "kubeflow/tf-job/tf-job.libsonnet";
 local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];

 {
  getDatagenCmd(params)::
    [
+      "/usr/local/sbin/t2t-entrypoint",
      "t2t-datagen",
      "--problem=" + params.problem,
      "--data_dir=" + params.dataDir,
@ -24,36 +24,32 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
      local trainer = [
        "/usr/local/sbin/t2t-entrypoint",
        "t2t-trainer",
-        "--generate_data",
        "--problem=" + params.problem,
        "--model=" + params.model,
        "--hparams_set=" + params.hparams_set,
        "--data_dir=" + params.dataDir,
        "--output_dir=" + params.outputDir,
        "--train_steps=" + std.toString(params.train_steps),
+        "--eval_steps=" + std.toString(params.eval_steps),
+        "--t2t_usr_dir=/app/code_search/t2t",
      ],

-      local workerBase = trainer + [
+      worker: trainer,
+
+      worker_dist: trainer + [
        "--schedule=train",
        "--ps_gpu=" + std.toString(params.numPsGpu),
        "--worker_gpu=" + std.toString(params.numWorkerGpu),
-        "--worker_replicas=" + std.toString(params.numWorker + params.numMaster),
+        "--worker_replicas=" + std.toString(params.numWorker),
        "--ps_replicas=" + std.toString(params.numPs),
        "--eval_steps=" + std.toString(params.eval_steps),
+        "--worker_job=/job:worker",
      ],

      ps: trainer + [
        "--schedule=run_std_server",
        "--ps_job=/job:ps",
      ],
-
-      worker: workerBase + [
-        "--worker_job=/job:worker",
-      ],
-
-      master: workerBase + [
-        "--worker_job=/job:master",
-      ],
  },

  tfJobReplica(replicaType, number, args, image, numGpus=0, imagePullSecrets=[], env=[], volumes=[], volumeMounts=[])::
@ -61,9 +57,9 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
      image: image,
      name: "tensorflow",
      [if std.length(args) > 0 then "args"]: args,
-      [if numGpus > 0 then "resources"]: {
+      resources: {
        limits: {
-          "nvidia.com/gpu": numGpus,
+          [if numGpus > 0 then "nvidia.com/gpu"]: numGpus,
        },
      },
      [if std.length(env) > 0 then "env"]: env,
@ -76,34 +72,26 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
          containers: [ containerSpec ],
          [if std.length(imagePullSecrets) > 0 then "imagePullSecrets"]: imagePullSecrets,
          [if std.length(volumes) > 0 then "volumes"]: volumes,
-          restartPolicy: "OnFailure",
+          // restartPolicy: "OnFailure",
        },
      },
-      tfReplicaType: replicaType,
    },

  parts(newParams, env):: {
    local params = baseParams + newParams,

-    local terminationPolicy = if params.numMaster == 1
-                              then tfJob.parts.tfJobTerminationPolicy("MASTER", 0)
-                              else tfJob.parts.tfJobTerminationPolicy("WORKER", 0),
-
    local workerImage = if params.numWorkerGpu > 0 then params.imageGpu else params.image,
-    local workerImagePullSecrets = [
-      { name: "gcp-registry-credentials" },
-    ],
    local workerEnv = [
      {
        name: "GOOGLE_APPLICATION_CREDENTIALS",
-        value: "/secret/gcp-credentials/key.json"
+        value: "/secret/gcp-credentials/user-gcp-sa.json"
      },
    ],
    local workerVolumes = [
      {
        name: "gcp-credentials",
        secret: {
-          secretName: "gcp-credentials",
+          secretName: "user-gcp-sa",
        },
      },
    ],
@ -115,26 +103,32 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
    ],

    local cmd = $.getTrainerCmd(params),
+    local workerCmd = if params.jobType == "exporter" then $.getExporterCmd(params)
+                      else if params.jobType == "datagen" then $.getDatagenCmd(params)
+                      else cmd.worker,

-    job::
-      tfJob.parts.tfJob(
-        params.name,
-        env.namespace,
-        if params.jobType == "exporter" then
-          [
-            $.tfJobReplica("MASTER", params.numMaster, $.getExporterCmd(params), workerImage, params.numWorkerGpu,
-                            workerImagePullSecrets, workerEnv, workerVolumes, workerVolumeMounts),
-          ]
-        else
-          [
-            $.tfJobReplica("MASTER", params.numMaster, cmd.master, workerImage, params.numWorkerGpu,
-                            workerImagePullSecrets, workerEnv, workerVolumes, workerVolumeMounts),
-            $.tfJobReplica("WORKER", params.numWorker, cmd.worker, workerImage, params.numWorkerGpu,
-                            workerImagePullSecrets, workerEnv, workerVolumes, workerVolumeMounts),
-            $.tfJobReplica("PS", params.numPs, cmd.ps, workerImage, params.numPsGpu,
-                            workerImagePullSecrets, workerEnv, workerVolumes, workerVolumeMounts),
-          ],
-        terminationPolicy
-      ),
+    job:: {
+      apiVersion: "kubeflow.org/v1alpha2",
+      kind: "TFJob",
+      metadata: {
+        name: params.name,
+        namespace: env.namespace,
+      },
+      spec: {
+        tfReplicaSpecs: {
+          [if params.numPs > 0 then "PS"]: $.tfJobReplica("PS", params.numPs, cmd.ps, workerImage,
+                                                          numGpus=params.numPsGpu,
+                                                          env=workerEnv,
+                                                          volumes=workerVolumes,
+                                                          volumeMounts=workerVolumeMounts),
+          [if params.numWorker > 0 then "Worker"]: $.tfJobReplica("WORKER", params.numWorker,
+                                                                  workerCmd, workerImage,
+                                                                  numGpus=params.numPsGpu,
+                                                                  env=workerEnv,
+                                                                  volumes=workerVolumes,
+                                                                  volumeMounts=workerVolumeMounts),
+        },
+      },
+    },
  },
 }
--- a/code_search/kubeflow/vendor/kubeflow/tf-job/README.md
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/README.md
@ -1,91 +0,0 @@
-<!-- START doctoc generated TOC please keep comment here to allow auto update -->
-<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
-**Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
-
- [tf-job](#tf-job)
-  - [Quickstart](#quickstart)
-  - [Using the library](#using-the-library)
-    - [io.ksonnet.pkg.tf-job](#ioksonnetpkgtf-job)
-      - [Example](#example)
-      - [Parameters](#parameters)
-      - [Example](#example-1)
-      - [Parameters](#parameters-1)
-
-<!-- END doctoc generated TOC please keep comment here to allow auto update -->
-
-# tf-job
-
-> Prototypes for running TensorFlow jobs.
-
-
-* [Quickstart](#quickstart)
-* [Using Prototypes](#using-prototypes)
-  * [io.ksonnet.pkg.tf-job](#io.ksonnet.pkg.tf-job)
-  * [io.ksonnet.pkg.tf-cnn](#io.ksonnet.pkg.tf-cnn)
-
-## Quickstart
-
-*The following commands use the `io.ksonnet.pkg.tf-job` prototype to generate Kubernetes YAML for tf-job, and then deploys it to your Kubernetes cluster.*
-
-First, create a cluster and install the ksonnet CLI (see root-level [README.md](rootReadme)).
-
-If you haven't yet created a [ksonnet application](linkToSomewhere), do so using `ks init <app-name>`.
-
-Finally, in the ksonnet application directory, run the following:
-
-```shell
-# Expand prototype as a Jsonnet file, place in a file in the
-# `components/` directory. (YAML and JSON are also available.)
-$ ks prototype use io.ksonnet.pkg.tf-job tf-job \
-  --namespace default \
-  --name tf-job
-
-# Apply to server.
-$ ks apply -f tf-job.jsonnet
-```
-
-## Using the library
-
-The library files for tf-job define a set of relevant *parts* (_e.g._, deployments, services, secrets, and so on) that can be combined to configure tf-job for a wide variety of scenarios. For example, a database like Redis may need a secret to hold the user password, or it may have no password if it's acting as a cache.
-
-This library provides a set of pre-fabricated "flavors" (or "distributions") of tf-job, each of which is configured for a different use case. These are captured as ksonnet *prototypes*, which allow users to interactively customize these distributions for their specific needs.
-
-These prototypes, as well as how to use them, are enumerated below.
-
-### io.ksonnet.pkg.tf-job
-
-A TensorFlow job (could be training or evaluation).
-#### Example
-
-```shell
-# Expand prototype as a Jsonnet file, place in a file in the
-# `components/` directory. (YAML and JSON are also available.)
-$ ks prototype use io.ksonnet.pkg.tf-job tf-job \
-  --name YOUR_NAME_HERE
-```
-
-#### Parameters
-
-The available options to pass prototype are:
-
-* `--name=<name>`: Name to give to each of the components [string]
-### io.ksonnet.pkg.tf-cnn
-
-A TensorFlow CNN Benchmarking job
-#### Example
-
-```shell
-# Expand prototype as a Jsonnet file, place in a file in the
-# `components/` directory. (YAML and JSON are also available.)
-$ ks prototype use io.ksonnet.pkg.tf-cnn tf-job \
-  --name YOUR_NAME_HERE
-```
-
-#### Parameters
-
-The available options to pass prototype are:
-
-* `--name=<name>`: Name for the job. [string]
-
-
-[rootReadme]: https://github.com/ksonnet/mixins
--- a/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml
@ -1,35 +0,0 @@
-{
-   "name": "tf-job",
-   "apiVersion": "0.0.1",
-   "kind": "ksonnet.io/parts",
-   "description": "Prototypes for running TensorFlow jobs.\n",
-   "author": "kubeflow team <kubeflow-team@google.com>",
-   "contributors": [
-      {
-         "name": "Jeremy Lewi",
-         "email": "jlewi@google.com"
-      }
-   ],
-   "repository": {
-      "type": "git",
-      "url": "https://github.com/kubeflow/kubeflow"
-   },
-   "bugs": {
-      "url": "https://github.com/kubeflow/kubeflow/issues"
-   },
-   "keywords": [
-      "kubeflow",
-      "tensorflow",
-      "database"
-   ],
-   "quickStart": {
-      "prototype": "io.ksonnet.pkg.tf-job",
-      "componentName": "tf-job",
-      "flags": {
-         "name": "tf-job",
-         "namespace": "default"
-      },
-      "comment": "Run TensorFlow Job"
-   },
-   "license": "Apache 2.0"
-}
--- a/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet
@ -1,65 +0,0 @@
-// @apiVersion 0.1
-// @name io.ksonnet.pkg.tf-job
-// @description A TensorFlow job (could be training or evaluation).
-// @shortDescription A TensorFlow job.
-// @param name string Name to give to each of the components
-// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
-// @optionalParam args string null Comma separated list of arguments to pass to the job
-// @optionalParam image string null The docker image to use for the job.
-// @optionalParam image_gpu string null The docker image to use when using GPUs.
-// @optionalParam image_pull_secrets string null Comma-delimited list of secret names to use credentials in pulling your docker images.
-// @optionalParam num_masters number 1 The number of masters to use
-// @optionalParam num_ps number 0 The number of ps to use
-// @optionalParam num_workers number 0 The number of workers to use
-// @optionalParam num_gpus number 0 The number of GPUs to attach to workers.
-
-// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--".
-
-local k = import "k.libsonnet";
-local tfJob = import "kubeflow/tf-job/tf-job.libsonnet";
-// updatedParams uses the environment namespace if
-// the namespace parameter is not explicitly set
-local updatedParams = params {
-  namespace: if params.namespace == "null" then env.namespace else params.namespace,
-};
-
-local name = import "param://name";
-local namespace = updatedParams.namespace;
-
-local argsParam = import "param://args";
-local args =
-  if argsParam == "null" then
-    []
-  else
-    std.split(argsParam, ",");
-
-local image = import "param://image";
-local imageGpu = import "param://image_gpu";
-local imagePullSecrets = import "param://image_pull_secrets";
-local numMasters = import "param://num_masters";
-local numPs = import "param://num_ps";
-local numWorkers = import "param://num_workers";
-local numGpus = import "param://num_gpus";
-
-local terminationPolicy = if numMasters == 1 then
-  tfJob.parts.tfJobTerminationPolicy("MASTER", 0)
-else
-  tfJob.parts.tfJobTerminationPolicy("WORKER", 0);
-
-local workerSpec = if numGpus > 0 then
-  tfJob.parts.tfJobReplica("WORKER", numWorkers, args, imageGpu, imagePullSecrets, numGpus)
-else
-  tfJob.parts.tfJobReplica("WORKER", numWorkers, args, image, imagePullSecrets);
-
-std.prune(k.core.v1.list.new([
-  tfJob.parts.tfJob(
-    name,
-    namespace,
-    [
-      tfJob.parts.tfJobReplica("MASTER", numMasters, args, image, imagePullSecrets),
-      workerSpec,
-      tfJob.parts.tfJobReplica("PS", numPs, args, image, imagePullSecrets),
-    ],
-    terminationPolicy
-  ),
-]))
--- a/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet
@ -1,59 +0,0 @@
-local k = import "k.libsonnet";
-local util = import "util.libsonnet";
-
-{
-  parts:: {
-    tfJobReplica(replicaType, number, args, image, imagePullSecrets=[], numGpus=0)::
-      local baseContainer = {
-        image: image,
-        name: "tensorflow",
-      };
-      local containerArgs = if std.length(args) > 0 then
-        {
-          args: args,
-        }
-      else {};
-      local resources = if numGpus > 0 then {
-        resources: {
-          limits: {
-            "nvidia.com/gpu": numGpus,
-          },
-        },
-      } else {};
-      if number > 0 then
-        {
-          replicas: number,
-          template: {
-            spec: {
-              imagePullSecrets: [{ name: secret } for secret in util.toArray(imagePullSecrets)],
-              containers: [
-                baseContainer + containerArgs + resources,
-              ],
-              restartPolicy: "OnFailure",
-            },
-          },
-          tfReplicaType: replicaType,
-        }
-      else {},
-
-    tfJobTerminationPolicy(replicaName, replicaIndex):: {
-      chief: {
-        replicaName: replicaName,
-        replicaIndex: replicaIndex,
-      },
-    },
-
-    tfJob(name, namespace, replicas, tp):: {
-      apiVersion: "kubeflow.org/v1alpha1",
-      kind: "TFJob",
-      metadata: {
-        name: name,
-        namespace: namespace,
-      },
-      spec: {
-        replicaSpecs: replicas,
-        terminationPolicy: tp,
-      },
-    },
-  },
-}
--- a/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet
@ -1,7 +0,0 @@
-{
-  // Convert a comma-delimited string to an array.
-  toArray(str)::
-    if std.type(str) == "string" && str != "null" && std.length(str) > 0 then
-      std.split(str, ",")
-    else [],
-}
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/README.md
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/README.md
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/parts.yaml
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/parts.yaml
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/prototypes/tf-serving-all-features.jsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/prototypes/tf-serving-all-features.jsonnet
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/tf-serving.libsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/tf-serving.libsonnet
@ -122,7 +122,6 @@
      args: [
        "/usr/bin/tensorflow_model_server",
        "--port=9000",
-        "--rest_api_port=8000",
        "--model_name=" + $.params.modelName,
        "--model_base_path=" + $.params.modelPath,
      ],
@ -130,9 +129,6 @@
        {
          containerPort: 9000,
        },
-        {
-          containerPort: 8000,
-        },
      ],
      // TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
      // model-server doesn't have something we can use out of the box.
--- a/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/util.libsonnet
+++ b/code_search/kubeflow/vendor/kubeflow/tf-serving@e95f94a1a97a0974ada734895d590b5ba565fa77/util.libsonnet