From 4bd30a1e68739391626ff6d9fa54838f62f032fc Mon Sep 17 00:00:00 2001 From: Sanyam Kapoor Date: Fri, 15 Jun 2018 18:16:34 -0700 Subject: [PATCH] Language task on kubeflow (#143) * [WIP] initialize ksonnet app * Push images to GCR * Upgrade Docker container to run T2T entrypoint with appropriate env vars * Add a tf-job based t2t-job * Fix GPU parameters --- code_search/README.md | 65 +++++++++---- code_search/kubeflow/.gitignore | 5 + code_search/kubeflow/app.yaml | 31 +++++++ .../kubeflow/components/params.libsonnet | 33 +++++++ .../components/t2t-gh-summarizer.jsonnet | 7 ++ .../kubeflow/components/t2t-job.libsonnet | 67 ++++++++++++++ .../kubeflow/environments/base.libsonnet | 4 + .../kubeflow/vendor/kubeflow/tf-job/README.md | 91 +++++++++++++++++++ .../vendor/kubeflow/tf-job/parts.yaml | 35 +++++++ .../kubeflow/tf-job/prototypes/tf-job.jsonnet | 65 +++++++++++++ .../vendor/kubeflow/tf-job/tf-job.libsonnet | 59 ++++++++++++ .../vendor/kubeflow/tf-job/util.libsonnet | 7 ++ code_search/language_task/Dockerfile | 9 +- code_search/language_task/build_image.sh | 9 +- code_search/language_task/run.sh | 36 -------- code_search/language_task/t2t-entrypoint.sh | 29 ++++++ 16 files changed, 496 insertions(+), 56 deletions(-) create mode 100644 code_search/kubeflow/.gitignore create mode 100644 code_search/kubeflow/app.yaml create mode 100644 code_search/kubeflow/components/params.libsonnet create mode 100644 code_search/kubeflow/components/t2t-gh-summarizer.jsonnet create mode 100644 code_search/kubeflow/components/t2t-job.libsonnet create mode 100644 code_search/kubeflow/environments/base.libsonnet create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/README.md create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet create mode 100644 code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet delete mode 100755 code_search/language_task/run.sh create mode 100755 code_search/language_task/t2t-entrypoint.sh diff --git a/code_search/README.md b/code_search/README.md index ac8c652f..9bab5763 100644 --- a/code_search/README.md +++ b/code_search/README.md @@ -72,53 +72,82 @@ $ python preprocess/scripts/process_github_archive.py -i files/select_github_arc ## 2. Model Training A `Dockerfile` based on Tensorflow is provided along which has all the dependencies for this part of the pipeline. -By default, it is based off Tensorflow CPU 1.8.0 for `Python3` but can be overridden in the Docker image build using -the following command +By default, it is based off Tensorflow CPU 1.8.0 for `Python3` but can be overridden in the Docker image build. +This script builds and pushes the docker image to Google Container Registry. +### 2.1 Build & Push images to GCR + +**NOTE**: The images can be pushed to any registry of choice but rest of the + +* Authenticate with GCR ``` -$ export BUILD_IMAGE_TAG=my-new-tag # (optional) to change built image tag +$ gcloud auth configure-docker +``` + +* Setup environment variables +``` +$ export PROJECT= # (optional) setup project ID. if not set, image is not published to GCR +$ export BUILD_IMAGE_TAG=code-search:devel # (optional) to change built image tag $ export BASE_IMAGE_TAG=1.8.0-gpu-py3 # (optional) for GPU base image +``` + +* Build and push the image +``` $ ./language_task/build_image.sh ``` -### 2.1 Function Summarizer +See [GCR Pushing and Pulling Images](https://cloud.google.com/container-registry/docs/pushing-and-pulling) for more. + + +### 2.2 Train Locally + +**WARNING**: The container might run out of memory and be killed. + +#### 2.2.1 Function Summarizer This part generates a model to summarize functions into docstrings using the data generated in previous step. It uses `tensor2tensor`. * Generate `TFRecords` for training ``` -$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory -$ export DOCKER_ENTRYPOINT=t2t-datagen # (required) -$ ./language_task/run.sh --problem=github_function_summarizer +$ export MOUNT_DATA_DIR=/path/to/data/folder +$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data ${BUILD_IMAGE_TAG} \ + t2t-datagen --problem=github_function_summarizer --data_dir=/data ``` * Train transduction model using `Tranformer Networks` and a base hyper-parameters set ``` -$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory -$ export DOCKER_ENTRYPOINT=t2t-trainer # (required) -$ ./language_task/run.sh --problem=github_function_summarizer --model=transformer --hparams_set=transformer_base +$ export MOUNT_DATA_DIR=/path/to/data/folder +$ export MOUNT_OUTPUT_DIR=/path/to/output/folder +$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data -v ${MOUNT_OUTPUT_DIR}:/output ${BUILD_IMAGE_TAG} \ + t2t-trainer --problem=github_function_summarizer --data_dir=/data --output_dir=/output \ + --model=transformer --hparams_set=transformer_base ``` -### 2.2 Docstrings Language Model +#### 2.2.2 Docstrings Language Model This part trains a language model based on the docstrings in the dataset and uses `tensor2tensor` * Generate `TFRecords` for training ``` -$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory -$ export DOCKER_ENTRYPOINT=t2t-datagen # (required) -$ ./language_task/run.sh --problem=github_docstring_language_model +$ export MOUNT_DATA_DIR=/path/to/data/folder +$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data ${BUILD_IMAGE_TAG} \ + t2t-datagen --problem=github_docstring_language_model --data_dir=/data ``` * Train language model using `Tranformer Networks` and a custom hyper-parameters set ``` -$ export MOUNT_DATA_DIR=/path/to/data/folder # (optional) mount a local data directory -$ export MOUNT_OUTPUT_DIR=/path/to/output/folder # (optional) mount a local output directory -$ export DOCKER_ENTRYPOINT=t2t-trainer # (required) -$ ./language_task/run.sh --problem=github_docstring_language_model --model=transformer --hparams_set=transformer_gh_lm +$ export MOUNT_DATA_DIR=/path/to/data/folder +$ export MOUNT_OUTPUT_DIR=/path/to/output/folder +$ docker run --rm -it -v ${MOUNT_DATA_DIR}:/data -v ${MOUNT_OUTPUT_DIR}:/output ${BUILD_IMAGE_TAG} \ + t2t-trainer --problem=github_docstring_language_model --data_dir=/data --output_dir=/output \ + --model=transformer --hparams_set=transformer_gh_lm ``` +### 2.3 Train on Kubeflow + +TODO + # Acknowledgements This project derives from [hamelsmu/code_search](https://github.com/hamelsmu/code_search). diff --git a/code_search/kubeflow/.gitignore b/code_search/kubeflow/.gitignore new file mode 100644 index 00000000..0e218529 --- /dev/null +++ b/code_search/kubeflow/.gitignore @@ -0,0 +1,5 @@ +/lib +/.ksonnet/registries +/app.override.yaml +/.ks_environment +/environments diff --git a/code_search/kubeflow/app.yaml b/code_search/kubeflow/app.yaml new file mode 100644 index 00000000..e59f772c --- /dev/null +++ b/code_search/kubeflow/app.yaml @@ -0,0 +1,31 @@ +apiVersion: 0.1.0 +environments: + default: + destination: + namespace: kubeflow + server: https://130.211.225.204 + k8sVersion: v1.9.6 + path: default +kind: ksonnet.io/app +libraries: + tf-job: + gitVersion: + commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9 + refSpec: master + name: tf-job + registry: kubeflow +name: kubeflow +registries: + incubator: + gitVersion: + commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c + refSpec: master + protocol: github + uri: github.com/ksonnet/parts/tree/master/incubator + kubeflow: + gitVersion: + commitSha: d8e19a4762406bb454453331f52ed5a4433c0df9 + refSpec: master + protocol: github + uri: github.com/kubeflow/kubeflow/tree/master/kubeflow +version: 0.0.1 diff --git a/code_search/kubeflow/components/params.libsonnet b/code_search/kubeflow/components/params.libsonnet new file mode 100644 index 00000000..324a48cc --- /dev/null +++ b/code_search/kubeflow/components/params.libsonnet @@ -0,0 +1,33 @@ +{ + global: { + // User-defined global parameters; accessible to all component and environments, Ex: + // replicas: 4, + }, + components: { + // Component-level parameters, defined initially from 'ks prototype use ...' + // Each object below should correspond to a component in the components/ directory + "t2t-job": { + numWorker: 1, + numMaster: 1, + numPs: 1, + numWorkerGpu: 0, + numPsGpu: 0, + + train_steps: 100, + eval_steps: 10, + + image: "gcr.io/kubeflow-dev/code-search:devel", + imageGpu: "gcr.io/kubeflow-dev/code-search:gpu-devel", + imagePullSecrets: [], + }, + + "t2t-gh-summarizer": { + "name": "github_function_summarizer", + "problem": "github_function_summarizer", + "dataDir": "gs://kubeflow-dev/code-search/raw_data", + "outputDir": "gs://kubeflow-dev/code-search/train", + "model": "transformer", + "hparams_set": "transformer_base" + }, + }, +} diff --git a/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet b/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet new file mode 100644 index 00000000..bcbc8132 --- /dev/null +++ b/code_search/kubeflow/components/t2t-gh-summarizer.jsonnet @@ -0,0 +1,7 @@ +local k = import "k.libsonnet"; +local t2tJob = import "t2t-job.libsonnet"; + +local env = std.extVar("__ksonnet/environments"); +local params = std.extVar("__ksonnet/params").components["t2t-gh-summarizer"]; + +std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job])) diff --git a/code_search/kubeflow/components/t2t-job.libsonnet b/code_search/kubeflow/components/t2t-job.libsonnet new file mode 100644 index 00000000..23d58f85 --- /dev/null +++ b/code_search/kubeflow/components/t2t-job.libsonnet @@ -0,0 +1,67 @@ +local tfJob = import "kubeflow/tf-job/tf-job.libsonnet"; +local baseParams = std.extVar("__ksonnet/params").components["t2t-job"]; + +{ + parts(newParams, env):: { + local params = baseParams + newParams, + + local t2tCmd = { + datagen: [ + "t2t-datagen", + "--problem=" + params.problem, + "--data_dir=" + params.dataDir, + ], + + trainer: [ + "t2t-trainer", + "--problem=" + params.problem, + "--data_dir=" + params.dataDir, + "--output_dir=" + params.outputDir, + "--model=" + params.model, + "--hparams_set=" + params.hparams_set, + "--train_steps=" + std.toString(params.train_steps), + ], + + workerBase: self.trainer + [ + "--schedule=train", + "--ps_gpu=" + std.toString(params.numPsGpu), + "--worker_gpu=" + std.toString(params.numWorkerGpu), + "--worker_replicas=" + std.toString(params.numWorker + params.numMaster), + "--ps_replicas=" + std.toString(params.numPs), + "--eval_steps=" + std.toString(params.eval_steps), + ], + + ps: self.trainer + [ + "--schedule=run_std_server", + "--ps_job=/job:ps", + ], + + worker: self.workerBase + [ + "--worker_job=/job:worker", + ], + + master: self.workerBase + [ + "--worker_job=/job:master", + ], + }, + + local terminationPolicy = if params.numMaster == 1 + then tfJob.parts.tfJobTerminationPolicy("MASTER", 0) + else tfJob.parts.tfJobTerminationPolicy("WORKER", 0), + + local workerImage = if params.numWorkerGpu > 0 then params.imageGpu else params.image, + local psImage = if params.numPsGpu > 0 then params.imageGpu else params.image, + + job:: + tfJob.parts.tfJob( + params.name, + env.namespace, + [ + tfJob.parts.tfJobReplica("MASTER", params.numMaster, t2tCmd.master, workerImage, params.imagePullSecrets, params.numWorkerGpu), + tfJob.parts.tfJobReplica("WORKER", params.numWorker, t2tCmd.worker, workerImage, params.imagePullSecrets, params.numWorkerGpu), + tfJob.parts.tfJobReplica("PS", params.numPs, t2tCmd.ps, psImage, params.imagePullSecrets, params.numPsGpu), + ], + terminationPolicy + ), + }, +} diff --git a/code_search/kubeflow/environments/base.libsonnet b/code_search/kubeflow/environments/base.libsonnet new file mode 100644 index 00000000..a129affb --- /dev/null +++ b/code_search/kubeflow/environments/base.libsonnet @@ -0,0 +1,4 @@ +local components = std.extVar("__ksonnet/components"); +components + { + // Insert user-specified overrides here. +} diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/README.md b/code_search/kubeflow/vendor/kubeflow/tf-job/README.md new file mode 100644 index 00000000..9825b264 --- /dev/null +++ b/code_search/kubeflow/vendor/kubeflow/tf-job/README.md @@ -0,0 +1,91 @@ + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [tf-job](#tf-job) + - [Quickstart](#quickstart) + - [Using the library](#using-the-library) + - [io.ksonnet.pkg.tf-job](#ioksonnetpkgtf-job) + - [Example](#example) + - [Parameters](#parameters) + - [Example](#example-1) + - [Parameters](#parameters-1) + + + +# tf-job + +> Prototypes for running TensorFlow jobs. + + +* [Quickstart](#quickstart) +* [Using Prototypes](#using-prototypes) + * [io.ksonnet.pkg.tf-job](#io.ksonnet.pkg.tf-job) + * [io.ksonnet.pkg.tf-cnn](#io.ksonnet.pkg.tf-cnn) + +## Quickstart + +*The following commands use the `io.ksonnet.pkg.tf-job` prototype to generate Kubernetes YAML for tf-job, and then deploys it to your Kubernetes cluster.* + +First, create a cluster and install the ksonnet CLI (see root-level [README.md](rootReadme)). + +If you haven't yet created a [ksonnet application](linkToSomewhere), do so using `ks init `. + +Finally, in the ksonnet application directory, run the following: + +```shell +# Expand prototype as a Jsonnet file, place in a file in the +# `components/` directory. (YAML and JSON are also available.) +$ ks prototype use io.ksonnet.pkg.tf-job tf-job \ + --namespace default \ + --name tf-job + +# Apply to server. +$ ks apply -f tf-job.jsonnet +``` + +## Using the library + +The library files for tf-job define a set of relevant *parts* (_e.g._, deployments, services, secrets, and so on) that can be combined to configure tf-job for a wide variety of scenarios. For example, a database like Redis may need a secret to hold the user password, or it may have no password if it's acting as a cache. + +This library provides a set of pre-fabricated "flavors" (or "distributions") of tf-job, each of which is configured for a different use case. These are captured as ksonnet *prototypes*, which allow users to interactively customize these distributions for their specific needs. + +These prototypes, as well as how to use them, are enumerated below. + +### io.ksonnet.pkg.tf-job + +A TensorFlow job (could be training or evaluation). +#### Example + +```shell +# Expand prototype as a Jsonnet file, place in a file in the +# `components/` directory. (YAML and JSON are also available.) +$ ks prototype use io.ksonnet.pkg.tf-job tf-job \ + --name YOUR_NAME_HERE +``` + +#### Parameters + +The available options to pass prototype are: + +* `--name=`: Name to give to each of the components [string] +### io.ksonnet.pkg.tf-cnn + +A TensorFlow CNN Benchmarking job +#### Example + +```shell +# Expand prototype as a Jsonnet file, place in a file in the +# `components/` directory. (YAML and JSON are also available.) +$ ks prototype use io.ksonnet.pkg.tf-cnn tf-job \ + --name YOUR_NAME_HERE +``` + +#### Parameters + +The available options to pass prototype are: + +* `--name=`: Name for the job. [string] + + +[rootReadme]: https://github.com/ksonnet/mixins diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml b/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml new file mode 100644 index 00000000..905ff0a9 --- /dev/null +++ b/code_search/kubeflow/vendor/kubeflow/tf-job/parts.yaml @@ -0,0 +1,35 @@ +{ + "name": "tf-job", + "apiVersion": "0.0.1", + "kind": "ksonnet.io/parts", + "description": "Prototypes for running TensorFlow jobs.\n", + "author": "kubeflow team ", + "contributors": [ + { + "name": "Jeremy Lewi", + "email": "jlewi@google.com" + } + ], + "repository": { + "type": "git", + "url": "https://github.com/kubeflow/kubeflow" + }, + "bugs": { + "url": "https://github.com/kubeflow/kubeflow/issues" + }, + "keywords": [ + "kubeflow", + "tensorflow", + "database" + ], + "quickStart": { + "prototype": "io.ksonnet.pkg.tf-job", + "componentName": "tf-job", + "flags": { + "name": "tf-job", + "namespace": "default" + }, + "comment": "Run TensorFlow Job" + }, + "license": "Apache 2.0" +} diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet new file mode 100644 index 00000000..f3e5e68b --- /dev/null +++ b/code_search/kubeflow/vendor/kubeflow/tf-job/prototypes/tf-job.jsonnet @@ -0,0 +1,65 @@ +// @apiVersion 0.1 +// @name io.ksonnet.pkg.tf-job +// @description A TensorFlow job (could be training or evaluation). +// @shortDescription A TensorFlow job. +// @param name string Name to give to each of the components +// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set. +// @optionalParam args string null Comma separated list of arguments to pass to the job +// @optionalParam image string null The docker image to use for the job. +// @optionalParam image_gpu string null The docker image to use when using GPUs. +// @optionalParam image_pull_secrets string null Comma-delimited list of secret names to use credentials in pulling your docker images. +// @optionalParam num_masters number 1 The number of masters to use +// @optionalParam num_ps number 0 The number of ps to use +// @optionalParam num_workers number 0 The number of workers to use +// @optionalParam num_gpus number 0 The number of GPUs to attach to workers. + +// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--". + +local k = import "k.libsonnet"; +local tfJob = import "kubeflow/tf-job/tf-job.libsonnet"; +// updatedParams uses the environment namespace if +// the namespace parameter is not explicitly set +local updatedParams = params { + namespace: if params.namespace == "null" then env.namespace else params.namespace, +}; + +local name = import "param://name"; +local namespace = updatedParams.namespace; + +local argsParam = import "param://args"; +local args = + if argsParam == "null" then + [] + else + std.split(argsParam, ","); + +local image = import "param://image"; +local imageGpu = import "param://image_gpu"; +local imagePullSecrets = import "param://image_pull_secrets"; +local numMasters = import "param://num_masters"; +local numPs = import "param://num_ps"; +local numWorkers = import "param://num_workers"; +local numGpus = import "param://num_gpus"; + +local terminationPolicy = if numMasters == 1 then + tfJob.parts.tfJobTerminationPolicy("MASTER", 0) +else + tfJob.parts.tfJobTerminationPolicy("WORKER", 0); + +local workerSpec = if numGpus > 0 then + tfJob.parts.tfJobReplica("WORKER", numWorkers, args, imageGpu, imagePullSecrets, numGpus) +else + tfJob.parts.tfJobReplica("WORKER", numWorkers, args, image, imagePullSecrets); + +std.prune(k.core.v1.list.new([ + tfJob.parts.tfJob( + name, + namespace, + [ + tfJob.parts.tfJobReplica("MASTER", numMasters, args, image, imagePullSecrets), + workerSpec, + tfJob.parts.tfJobReplica("PS", numPs, args, image, imagePullSecrets), + ], + terminationPolicy + ), +])) diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet new file mode 100644 index 00000000..2ac2f744 --- /dev/null +++ b/code_search/kubeflow/vendor/kubeflow/tf-job/tf-job.libsonnet @@ -0,0 +1,59 @@ +local k = import "k.libsonnet"; +local util = import "util.libsonnet"; + +{ + parts:: { + tfJobReplica(replicaType, number, args, image, imagePullSecrets=[], numGpus=0):: + local baseContainer = { + image: image, + name: "tensorflow", + }; + local containerArgs = if std.length(args) > 0 then + { + args: args, + } + else {}; + local resources = if numGpus > 0 then { + resources: { + limits: { + "nvidia.com/gpu": numGpus, + }, + }, + } else {}; + if number > 0 then + { + replicas: number, + template: { + spec: { + imagePullSecrets: [{ name: secret } for secret in util.toArray(imagePullSecrets)], + containers: [ + baseContainer + containerArgs + resources, + ], + restartPolicy: "OnFailure", + }, + }, + tfReplicaType: replicaType, + } + else {}, + + tfJobTerminationPolicy(replicaName, replicaIndex):: { + chief: { + replicaName: replicaName, + replicaIndex: replicaIndex, + }, + }, + + tfJob(name, namespace, replicas, tp):: { + apiVersion: "kubeflow.org/v1alpha1", + kind: "TFJob", + metadata: { + name: name, + namespace: namespace, + }, + spec: { + replicaSpecs: replicas, + terminationPolicy: tp, + }, + }, + }, +} diff --git a/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet b/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet new file mode 100644 index 00000000..d5458f40 --- /dev/null +++ b/code_search/kubeflow/vendor/kubeflow/tf-job/util.libsonnet @@ -0,0 +1,7 @@ +{ + // Convert a comma-delimited string to an array. + toArray(str):: + if std.type(str) == "string" && str != "null" && std.length(str) > 0 then + std.split(str, ",") + else [], +} diff --git a/code_search/language_task/Dockerfile b/code_search/language_task/Dockerfile index 489e8802..a97a0722 100644 --- a/code_search/language_task/Dockerfile +++ b/code_search/language_task/Dockerfile @@ -6,8 +6,15 @@ FROM tensorflow/tensorflow:$BASE_IMAGE_TAG ADD requirements.txt / -RUN pip3 --no-cache-dir install -r /requirements.txt +RUN pip3 --no-cache-dir install -r /requirements.txt &&\ + apt-get update && apt-get install -y jq &&\ + rm -rf /var/lib/apt/lists/* VOLUME ["/data", "/output"] ADD t2t_problems/* /t2t_problems/ +ADD t2t-entrypoint.sh /usr/local/sbin/t2t-entrypoint + +ENV T2T_USR_DIR=/t2t_problems + +ENTRYPOINT ["/usr/local/sbin/t2t-entrypoint"] diff --git a/code_search/language_task/build_image.sh b/code_search/language_task/build_image.sh index b0458e46..d7a29ced 100755 --- a/code_search/language_task/build_image.sh +++ b/code_search/language_task/build_image.sh @@ -2,8 +2,9 @@ set -e +PROJECT=${PROJECT:-} BASE_IMAGE_TAG=${BASE_IMAGE_TAG:-1.8.0-py3} # 1.8.0-gpu-py3 for GPU-based image -BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-semantic-code-search:devel} +BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-code-search:devel} # Directory of this script used as docker context _SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" @@ -12,4 +13,10 @@ pushd "$_SCRIPT_DIR" docker build -t ${BUILD_IMAGE_TAG} --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} . +# Push image to GCR if PROJECT available +if [[ ! -z "${PROJECT}" ]]; then + docker tag ${BUILD_IMAGE_TAG} gcr.io/${PROJECT}/${BUILD_IMAGE_TAG} + docker push gcr.io/${PROJECT}/${BUILD_IMAGE_TAG} +fi + popd diff --git a/code_search/language_task/run.sh b/code_search/language_task/run.sh deleted file mode 100755 index eafadc87..00000000 --- a/code_search/language_task/run.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# Script Variables -IMAGE_TAG=${IMAGE_TAG:-semantic-code-search:devel} -DOCKER_ENTRYPOINT=${DOCKER_ENTRYPOINT:-} - -MOUNT_DATA_DIR=${MOUNT_DATA_DIR:-} -MOUNT_OUTPUT_DIR=${MOUNT_OUTPUT_DIR:-} - -DATA_DIR=${DATA_DIR:-/data} -OUTPUT_DIR=${OUTPUT_DIR:-/output} - -# Internal Variables -_DOCKER_RUN_OPTS="-it --rm --entrypoint=${DOCKER_ENTRYPOINT}" -_DOCKER_CMD="${@} --t2t_usr_dir=/t2t_problems --tmp_dir=/tmp --data_dir=${DATA_DIR} --output_dir=${OUTPUT_DIR}" - -if [[ -z ${DOCKER_ENTRYPOINT} ]]; then - echo "ERROR: Missing DOCKER_ENTRYPOINT environment variable! Use 't2t-datagen' or 't2t-trainer'" - exit 1 -fi - -# Mount local directories (if specified) -if [[ ! -z ${MOUNT_DATA_DIR} ]]; then - _DOCKER_RUN_OPTS="${_DOCKER_RUN_OPTS} -v ${MOUNT_DATA_DIR}:${DATA_DIR}:rw" -fi - -if [[ ! -z ${MOUNT_OUTPUT_DIR} ]]; then - _DOCKER_RUN_OPTS="${_DOCKER_RUN_OPTS} -v ${MOUNT_OUTPUT_DIR}:${OUTPUT_DIR}:rw" -fi - -_FINAL_CMD="docker run ${_DOCKER_RUN_OPTS} ${IMAGE_TAG} ${_DOCKER_CMD}" - -echo "${_FINAL_CMD}" -eval "${_FINAL_CMD}" diff --git a/code_search/language_task/t2t-entrypoint.sh b/code_search/language_task/t2t-entrypoint.sh new file mode 100755 index 00000000..c501d7a6 --- /dev/null +++ b/code_search/language_task/t2t-entrypoint.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -e + +T2T_USR_DIR=${T2T_USR_DIR:-} +TARGET_BIN="${1}" +TARGET_BIN_OPTS="--tmp_dir=/tmp" + +# Add T2T user directory for new problems +if [[ ! -z "${T2T_USR_DIR}" ]]; then + TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --t2t_usr_dir=${T2T_USR_DIR}" +fi + +# Process TF_CONFIG to pass parameters distributed training parameters to `t2t-trainer` +TF_CONFIG=${TF_CONFIG:-} +if [[ ! -z "${TF_CONFIG}" ]]; then + WORKER_ID=$(echo "${TF_CONFIG}" | jq ".task.index") + WORKER_TYPE=$(echo "${TF_CONFIG}" | jq -r ".task.type") + MASTER_INSTANCE=$(echo "${TF_CONFIG}" | jq -r ".cluster.${WORKER_TYPE}[${WORKER_ID}]") + + if [[ "${TARGET_BIN}" = "t2t-trainer" ]]; then + TARGET_BIN_OPTS="${TARGET_BIN_OPTS} --master=grpc://${MASTER_INSTANCE} --worker_id=${WORKER_ID}" + fi +fi + +EVAL_CMD="${TARGET_BIN} ${TARGET_BIN_OPTS} ${@:2}" + +echo "Running command: '${EVAL_CMD}'" +eval "${EVAL_CMD}"