Merge pull request #30 from cwbeitel/agents

Reinforcement learning example with TensorFlow Agents
2018-03-09 14:59:59 -08:00 · 2018-03-09 14:59:59 -08:00 · 0837557219
parent d1a2adfb01 64de15f447
commit 0837557219
26 changed files with 1967 additions and 0 deletions
--- a/agents/Dockerfile
+++ b/agents/Dockerfile
@ -0,0 +1,26 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM tensorflow/tensorflow:1.4.1
+
+# Needed for rendering and uploading renders
+RUN apt-get update
+RUN apt-get install -y libav-tools ffmpeg git
+
+ADD requirements.txt /app/
+RUN pip install -r /app/requirements.txt
+
+ADD trainer /app/trainer/
+
+WORKDIR /app/
+
+ENTRYPOINT ["python", "-m", "trainer.task"]
--- a/agents/README.md
+++ b/agents/README.md
@ -0,0 +1,31 @@
+# Reinforcement Learning with [tensorflow/agents](https://github.com/tensorflow/agents)
+
+Here we provide a demonstration of training a reinforcement learning agent to perform a robotic grasping task using Kubeflow running on Google Kubernetes Engine. In this demonstration you will learn how to paramaeterize a training job, submit it to run on your cluster, monitor the job including launching a tensorboard instance, and finally producing renders of the agent performing the robotic grasping task.
+
+For clarity and fun you can check out what the product of this tutorial will look like by clicking through the render screenshot below to a short video of a trained agent performing a simulated robotic block grasping task:
+
+[![](doc/render_preview.png)](https://youtu.be/0X0w5XOtcHw)
+
+### Setup
+
+##### GCP and Kubeflow configuration
+
+This tutorial assumes you have deployed a Kubernetes cluster on your provider of choice and have completed the steps described in the [Kubeflow User Guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) to deploy the core, argo, and nfs components.
+
+##### Launching
+
+This example is intended to be run inside of the `gcr.io/kubeflow/tensorflow-notebook-cpu` container running on JupyterHub which is in turn running on Kubeflow. You may provide the name of this container via the spawner options dialog.
+
+For general troubleshooting of the spawning of notebook containers on JupyterHub or anything else related to your Kubeflow deployment please refer to the [Kubeflow User Guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md).
+
+Once the notebook is launched the only setup step that is required is to use git to obtain the source code of the example which you can do as follows (from within the tensorflow-notebook container):
+
+```bash
+cd /home/jovyan
+git clone https://github.com/kubeflow/examples kubeflow-examples
+```
+
+The demonstration notebook can then be accessed via /home/jovyan/kubeflow-examples/agents/doc/demo.ipynb.
+
+Well it looks like our initial setup is finished 🎉🎉 and it's time to start playing around with that shiny new demonstration notebook!!
+
--- a/agents/app/.ksonnet/registries/incubator/422d521c05aa905df949868143b26445f5e4eda5.yaml
+++ b/agents/app/.ksonnet/registries/incubator/422d521c05aa905df949868143b26445f5e4eda5.yaml
@ -0,0 +1,39 @@
+apiVersion: "0.1"
+gitVersion:
+  commitSha: 422d521c05aa905df949868143b26445f5e4eda5
+  refSpec: master
+kind: ksonnet.io/registry
+libraries:
+  apache:
+    path: apache
+    version: master
+  efk:
+    path: efk
+    version: master
+  mariadb:
+    path: mariadb
+    version: master
+  memcached:
+    path: memcached
+    version: master
+  mongodb:
+    path: mongodb
+    version: master
+  mysql:
+    path: mysql
+    version: master
+  nginx:
+    path: nginx
+    version: master
+  node:
+    path: node
+    version: master
+  postgres:
+    path: postgres
+    version: master
+  redis:
+    path: redis
+    version: master
+  tomcat:
+    path: tomcat
+    version: master
--- a/agents/app/.ksonnet/registries/kubeflow/d7d859206f52168665cbc312391c67c162927e96.yaml
+++ b/agents/app/.ksonnet/registries/kubeflow/d7d859206f52168665cbc312391c67c162927e96.yaml
@ -0,0 +1,18 @@
+apiVersion: "0.1"
+gitVersion:
+  commitSha: d7d859206f52168665cbc312391c67c162927e96
+  refSpec: master
+kind: ksonnet.io/registry
+libraries:
+  argo:
+    path: argo
+    version: master
+  core:
+    path: core
+    version: master
+  tf-job:
+    path: tf-job
+    version: master
+  tf-serving:
+    path: tf-serving
+    version: master
--- a/agents/app/app.yaml
+++ b/agents/app/app.yaml
@ -0,0 +1,30 @@
+apiVersion: 0.0.1
+kind: ksonnet.io/app
+libraries:
+  core:
+    gitVersion:
+      commitSha: d7d859206f52168665cbc312391c67c162927e96
+      refSpec: master
+    name: core
+    registry: kubeflow
+  tf-job:
+    gitVersion:
+      commitSha: 30f913986607bb8414d51ef6a23fe60fec9afe3c
+      refSpec: master
+    name: tf-job
+    registry: kubeflow
+name: app
+registries:
+  incubator:
+    gitVersion:
+      commitSha: 422d521c05aa905df949868143b26445f5e4eda5
+      refSpec: master
+    protocol: github
+    uri: github.com/ksonnet/parts/tree/master/incubator
+  kubeflow:
+    gitVersion:
+      commitSha: 30f913986607bb8414d51ef6a23fe60fec9afe3c
+      refSpec: master
+    protocol: github
+    uri: github.com/kubeflow/kubeflow/tree/master/kubeflow
+version: 0.0.1
--- a/agents/app/components/agents.jsonnet
+++ b/agents/app/components/agents.jsonnet
@ -0,0 +1,98 @@
+local params = std.extVar("__ksonnet/params").components["agents"];
+local k = import 'k.libsonnet';
+local deployment = k.extensions.v1beta1.deployment;
+local container = deployment.mixin.spec.template.spec.containersType;
+local podTemplate = k.extensions.v1beta1.podTemplate;
+
+local tfJob = import 'kubeflow/tf-job/tf-job.libsonnet';
+
+local name = params.name;
+local namespace = params.namespace;
+local num_gpus = params.num_gpus;
+local hparam_set_id = params.hparam_set_id;
+local jobTag = params.job_tag;
+local image = params.image;
+local imageGpu = params.image_gpu;
+local numCpu = params.num_cpu;
+local dumpDependencyVersions = params.dump_dependency_versions;
+local log_dir = params.log_dir;
+local hparamSetID = params.hparam_set_id;
+local runBaseTag = params.run_base_tag;
+local syncReplicas = params.sync_replicas;
+local algorithm = params.algorithm;
+local numAgents = params.num_agents;
+local evalEpisodes = params.eval_episodes;
+local env = params.env;
+local maxLength = params.max_length;
+local steps = params.steps;
+local network = params.network;
+local initMeanFactor = params.init_mean_factor;
+local learningRate = params.learning_rate;
+local optimizer = params.optimizer;
+local updateEpochs = params.update_epochs;
+local updateEvery = params.update_every;
+local discount = params.discount;
+local klTarget = params.kl_target;
+local klCutoffFactor = params.kl_cutoff_factor;
+local klCutoffCoef = params.kl_cutoff_coef;
+local klInitPenalty = params.kl_init_penalty;
+
+local renderSecs = params.render_secs;
+
+local args = [
+  "--run_mode=train",
+  "--logdir=" + log_dir,
+  "--hparam_set_id=" + hparamSetID,
+  "--run_base_tag=" + runBaseTag,
+  "--sync_replicas=" + syncReplicas,
+  "--num_gpus=" + num_gpus,
+  "--algorithm=" + algorithm,
+  "--num_agents=" + numAgents,
+  "--eval_episodes=" + evalEpisodes,
+  "--env=" + env,
+  "--max_length=" + maxLength,
+  "--steps=" + steps,
+  "--network=" + network,
+  "--init_mean_factor=" + initMeanFactor,
+  "--learning_rate=" + learningRate,
+  "--optimizer=" + optimizer,
+  "--update_epochs=" + updateEpochs,
+  "--update_every=" + updateEvery,
+  "--discount=" + discount,
+  "--kl_target=" + klTarget,
+  "--kl_cutoff_factor=" + klCutoffFactor,
+  "--kl_cutoff_coef=" + klCutoffCoef,
+  "--kl_init_penalty=" + klInitPenalty,
+  "--dump_dependency_versions=" + dumpDependencyVersions,
+  "--render_secs=" + renderSecs,
+];
+
+local workerSpec = if num_gpus > 0 then
+  	tfJob.parts.tfJobReplica("MASTER", 1, args, imageGpu, num_gpus)
+  	else
+  	tfJob.parts.tfJobReplica("MASTER", 1, args, image);
+
+local replicas = std.map(function(s)
+  s + {
+    template+: {
+      spec+:  {
+        containers: [
+          s.template.spec.containers[0] + {
+            resources: {
+              limits: {
+                cpu: numCpu
+              },
+              requests: {
+                cpu: numCpu
+              }
+            },
+          },
+        ],
+      },
+    },
+  },
+  std.prune([workerSpec]));
+
+local job = tfJob.parts.tfJob(name, namespace, replicas);
+
+std.prune(k.core.v1.list.new([job]))
--- a/agents/app/components/agents_render.jsonnet
+++ b/agents/app/components/agents_render.jsonnet
@ -0,0 +1,51 @@
+local params = std.extVar("__ksonnet/params").components["agents_render"];
+local k = import 'k.libsonnet';
+local deployment = k.extensions.v1beta1.deployment;
+local container = deployment.mixin.spec.template.spec.containersType;
+local podTemplate = k.extensions.v1beta1.podTemplate;
+
+local tfJob = import 'kubeflow/tf-job/tf-job.libsonnet';
+
+local name = params.name;
+local namespace = params.namespace;
+local num_gpus = params.num_gpus;
+local log_dir = params.log_dir;
+local imageGpu = "";
+local image = params.image;
+local numCpu = params.num_cpu;
+
+local args = [
+  "--run_mode=render",
+  "--logdir=" + log_dir,
+  "--num_agents=1"
+];
+
+local workerSpec = if num_gpus > 0 then
+  	tfJob.parts.tfJobReplica("MASTER", 1, args, imageGpu, num_gpus)
+  	else
+  	tfJob.parts.tfJobReplica("MASTER", 1, args, image);
+
+local replicas = std.map(function(s)
+  s + {
+    template+: {
+      spec+:  {
+        containers: [
+          s.template.spec.containers[0] + {
+            resources: {
+              limits: {
+                cpu: numCpu
+              },
+              requests: {
+                cpu: numCpu
+              }
+            },
+          },
+        ],
+      },
+    },
+  },
+  std.prune([workerSpec]));
+
+local job = tfJob.parts.tfJob(name, namespace, replicas);
+
+std.prune(k.core.v1.list.new([job]))
--- a/agents/app/components/params.libsonnet
+++ b/agents/app/components/params.libsonnet
@ -0,0 +1,65 @@
+{
+  global: {
+    // User-defined global parameters; accessible to all component and environments, Ex:
+    // replicas: 4,
+  },
+  components: {
+    // Component-level parameters, defined initially from 'ks prototype use ...'
+    // Each object below should correspond to a component in the components/ directory
+    "agents": {
+      algorithm: "agents.ppo.PPOAlgorithm",
+      discount: 0.995,
+      dump_dependency_versions: "True",
+      env: "KukaBulletEnv-v0",
+      eval_episodes: 25,
+      generate_data: "True",
+      hparam_set_id: "pybullet_kuka_ff",
+      image: "gcr.io/kubeflow-rl/agents:0221-2315-5b40",
+      image_gpu: "null",
+      init_mean_factor: 0.1,
+      job_tag: "0206-1409-6174",
+      kl_cutoff_coef: 1000,
+      kl_cutoff_factor: 2,
+      kl_init_penalty: 1,
+      kl_target: 0.01,
+      learning_rate: 0.0001,
+      log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo/kuka-0221-2329-afdd",
+      max_length: 1000,
+      name: "kuka-0221-2329-afdd",
+      namespace: "rl",
+      network: "agents.scripts.networks.feed_forward_gaussian",
+      num_agents: 30,
+      num_cpu: 30,
+      num_gpus: 0,
+      num_masters: 1,
+      num_ps: 1,
+      num_replicas: 1,
+      num_workers: 1,
+      optimizer: "tensorflow.train.AdamOptimizer",
+      render_secs: 600,
+      run_base_tag: "0e90193e",
+      run_mode: "train",
+      save_checkpoint_secs: 600,
+      save_checkpoints_secs: 600,
+      steps: 15000000,
+      sync_replicas: "False",
+      update_epochs: 25,
+      update_every: 60,
+    },
+    "agents_render": {
+      image: "gcr.io/kubeflow-rl/agents:0221-1635-d869",
+      log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo/kuka-0221-1650-31dc",
+      name: "render-0221-1705-4149",
+      namespace: "rl",
+      num_cpu: 4,
+      num_gpus: 0,
+    },
+    tensorboard: {
+      log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo",
+      name: "tboard-0221-2330-5c5c",
+      namespace: "rl",
+      secret: "gcp-credentials",
+      secret_file_name: "secret.json",
+    },
+  },
+}
--- a/agents/app/components/tensorboard.jsonnet
+++ b/agents/app/components/tensorboard.jsonnet
@ -0,0 +1,14 @@
+// from github.com/jlewi/kubeflow-rl
+
+local params = std.extVar("__ksonnet/params").components["tensorboard"];
+local k = import 'k.libsonnet';
+local tb = import "tensorboard.libsonnet";
+
+local name = params.name;
+local namespace = params.namespace;
+local logDir = params.log_dir;
+local secretName = params.secret;
+local secretFileName = params.secret_file_name;
+
+std.prune(k.core.v1.list.new([tb.parts(namespace, name).tbDeployment(logDir, secretName, secretFileName),
+                              tb.parts(namespace, name).service]))
--- a/agents/app/components/tensorboard.libsonnet
+++ b/agents/app/components/tensorboard.libsonnet
@ -0,0 +1,82 @@
+{
+	parts(namespace, name,):: {
+		service:: {
+		  "apiVersion": "v1", 
+		  "kind": "Service", 
+		  "metadata": {
+		    "name": name + "-tb",
+		    "namespace": namespace,
+		  }, 
+		  "spec": {
+		    "ports": [
+		      {
+		        "name": "http", 
+		        "port": 80, 
+		        "targetPort": 80,
+		      }
+		    ], 
+		    "selector": {
+		      "app": "tensorboard",
+		      "tb-job": name,
+		    },
+		  },
+		},
+
+		tbDeployment(logDir, secretName, secretFileName, tfImage="gcr.io/tensorflow/tensorflow:latest"):: {
+		  "apiVersion": "apps/v1beta1", 
+		  "kind": "Deployment", 
+		  "metadata": {
+		    "name": name + "-tb",
+		    "namespace": namespace,
+		  }, 
+		  "spec": {
+		    "replicas": 1, 
+		    "template": {
+		      "metadata": {
+		        "labels": {
+		          "app": "tensorboard",
+		          "tb-job": name,
+		        }, 
+		        "name": name,
+		        "namespace": namespace,
+		      }, 
+		      "spec": {
+		        "containers": [
+		          {
+		            "command": [
+		              "/usr/local/bin/tensorboard", 
+		              "--logdir=" + logDir, 
+		              "--port=80"
+		            ], 
+		            "image": tfImage, 
+		            "name": "tensorboard", 
+		            "ports": [
+		              {
+		                "containerPort": 80
+		              }
+		            ],
+		            "env":   [
+			           {
+			           "name": "GOOGLE_APPLICATION_CREDENTIALS",
+			           "value": "/secret/gcp-credentials/" + secretFileName,
+			           },
+			       ],
+		           "volumeMounts": [{
+			        "name": "credentials",
+			        "mountPath": "/secret/gcp-credentials",
+			        }],
+		          }
+		        ],
+		        "volumes": [{
+			            "name": "credentials",
+			            "secret": {
+			              "secretName": secretName,
+			            },
+			      }		       
+		        ],
+		      }
+		    }
+		  }
+		},
+	},
+}
--- a/agents/app/environments/base.libsonnet
+++ b/agents/app/environments/base.libsonnet
@ -0,0 +1,4 @@
+local components = std.extVar("__ksonnet/components");
+components + {
+  // Insert user-specified overrides here.
+}
--- a/agents/doc/builder.png
+++ b/agents/doc/builder.png
--- a/agents/doc/demo.ipynb
+++ b/agents/doc/demo.ipynb
--- a/agents/doc/jhub-spawn.png
+++ b/agents/doc/jhub-spawn.png
--- a/agents/doc/render.mp4
+++ b/agents/doc/render.mp4
--- a/agents/doc/render_preview.png
+++ b/agents/doc/render_preview.png
--- a/agents/doc/sa-create.png
+++ b/agents/doc/sa-create.png
--- a/agents/doc/tboard_mean_score.png
+++ b/agents/doc/tboard_mean_score.png
--- a/agents/requirements.txt
+++ b/agents/requirements.txt
@ -0,0 +1,5 @@
+-e git://github.com/tensorflow/agents.git@459c4f88ece996eac3489e6e97a6ee0b30bdd6b3#egg=agents
+pybullet==1.7.5
+gym==0.9.4
+tensorflow==1.4.1
+google-cloud-storage==1.7.0
--- a/agents/tools/builder/Dockerfile
+++ b/agents/tools/builder/Dockerfile
@ -0,0 +1,22 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM docker:17.10
+
+RUN apk update
+
+RUN apk add --no-cache curl python bash
+
+RUN curl https://sdk.cloud.google.com | bash
+
+RUN ln -s /root/google-cloud-sdk/bin/gsutil /usr/bin/gsutil
+RUN ln -s /root/google-cloud-sdk/bin/gcloud /usr/bin/gcloud
--- a/agents/tools/builder/README.md
+++ b/agents/tools/builder/README.md
@ -0,0 +1,23 @@
+# Container builder
+
+[![Docker Repository on Quay](https://quay.io/repository/cwbeitel/builder/status "Docker Repository on Quay")](https://quay.io/repository/cwbeitel/builder)
+
+A custom container builder image can be built in the standard way, e.g.
+
+```bash
+YOUR_BUILDER_IMAGE_TAG=quay.io/someuser/builder:0.1
+
+docker build -t $TAG .
+
+gcloud docker -- push $TAG
+```
+
+Then specified in the [container builder workflow](../config/builder.yaml) as an argument as follows:
+
+```bash
+argo submit config/builder.yaml --namespace kubeflow \
+    --parameter builder-image=${YOUR_BUILDER_IMAGE_TAG}
+    ...
+```
+
+Please refer to the [demonstration notebook](../demo/demo.ipynb) for more details.
--- a/agents/tools/builder/builder.yaml
+++ b/agents/tools/builder/builder.yaml
@ -0,0 +1,37 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Workflow
+metadata:
+  generateName: kubeflow-builder-
+spec:
+  entrypoint: workflow
+  arguments:
+    parameters:
+    - name: bundle
+      value: unspecified
+    - name: app-name
+      value: unspecified
+    - name: image-tag
+      value: unspecified
+    - name: project
+      value: unspecified
+    - name: builder-image
+      value: quay.io/cwbeitel/builder:0.1
+  templates:
+  - name: workflow
+    steps:
+    - - name: build-push
+        template: build-and-push
+  - name: build-and-push
+    container:
+      image: "{{workflow.parameters.builder-image}}"
+      command: [bash,-c]
+      args: ["mkdir -p /build; gsutil cp {{workflow.parameters.bundle}} /build/ && cd /build && tar -xzvf {{workflow.parameters.app-name}}.tgz; cd {{workflow.parameters.app-name}}; until docker ps; do sleep 3; done; docker build -t {{workflow.parameters.image-tag}} .; gcloud docker -- push {{workflow.parameters.trainer-tag}}; docker save {{workflow.parameters.image-tag}} $(docker history -q {{workflow.parameters.image-tag}}) > /build/cache.tar; gsutil cp /build/cache.tar gs://{{workflow.parameters.project}}-builder/{{workflow.parameters.app-name}}/cache/"]
+      env:
+      - name: DOCKER_HOST               #the docker daemon can be access on the standard port on localhost
+        value: 127.0.0.1
+    sidecars:
+    - name: dind
+      image: docker:17.10-dind          #Docker already provides an image for running a Docker daemon
+      securityContext:
+        privileged: true                #the Docker daemon can only run in a privileged container
+      mirrorVolumeMounts: true
--- a/agents/tools/release/envless_commit.sh
+++ b/agents/tools/release/envless_commit.sh
@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Copyright 2017 The Kubeflow Examples Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Perform a git commit after having removed the default ksonnet env
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+cd ${SCRIPT_DIR}/../../app
+
+ks env rm default
+
+cd ..
+
+git add *
+
+git commit
--- a/agents/tools/release/release_demo_container.sh
+++ b/agents/tools/release/release_demo_container.sh
@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+#
+# Copyright 2017 The Kubeflow Examples Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Build and release demo container
+
+RELEASE_VERSION=0.0.1
+APP_TAG=agents-demo
+
+RELEASE_TYPE=${RELEASE_TYPE:-dirty}
+
+if [ -z ${RELEASE_REGISTRY+x} ]; then
+  echo "Please specify a target registry for the release by setting RELEASE_REGISTRY."
+  exit 1
+fi
+
+IMAGE_TAG=${RELEASE_REGISTRY}/${APP_TAG}:${RELEASE_VERSION}
+
+if [ ${RELEASE_TYPE} == 'dirty' ]; then
+  echo "Building dirty release."
+  SALT=`python -c 'import datetime; import uuid; now=datetime.datetime.now(); print(now.strftime("%m%d-%H%M") + "-" + uuid.uuid4().hex[0:4])'`
+  IMAGE_TAG=${IMAGE_TAG}-${SALT}
+fi
+
+echo "Building release with tag: ${IMAGE_TAG}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+APP_DIR=${SCRIPT_DIR}/../../
+cd ${APP_DIR}
+
+docker build -t ${IMAGE_TAG} -f doc/Dockerfile .
+
+gcloud docker -- push ${IMAGE_TAG}
+#docker push ${IMAGE_TAG}
+
--- a/agents/trainer/init.py
+++ b/agents/trainer/init.py
@ -0,0 +1,19 @@
+# Copyright 2017 The TensorFlow Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Proximal Policy Optimization algorithm."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
--- a/agents/trainer/task.py
+++ b/agents/trainer/task.py
@ -0,0 +1,280 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provides an entrypoint for the training task."""
+
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import datetime
+import logging
+import os
+import pprint
+import uuid
+
+import pip
+import tensorflow as tf
+from google.cloud import storage
+
+import agents
+import pybullet_envs  # To make AntBulletEnv-v0 available.
+
+flags = tf.app.flags
+
+flags.DEFINE_string("run_mode", "train",
+                    "Run mode, one of [train, render, train_and_render].")
+flags.DEFINE_string("logdir", '/tmp/test',
+                    "The base directory in which to write logs and "
+                    "checkpoints.")
+flags.DEFINE_string("hparam_set_id", "pybullet_kuka_ff",
+                    "The name of the config object to be used to parameterize "
+                    "the run.")
+flags.DEFINE_string("run_base_tag",
+                    datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
+                    "Base tag to prepend to logs dir folder name. Defaults "
+                    "to timestamp.")
+flags.DEFINE_boolean("env_processes", True,
+                     "Step environments in separate processes to circumvent "
+                     "the GIL.")
+flags.DEFINE_integer("num_gpus", 0,
+                     "Total number of gpus for each machine."
+                     "If you don't use GPU, please set it to '0'")
+flags.DEFINE_integer("save_checkpoint_secs", 600,
+                     "Number of seconds between checkpoint save.")
+flags.DEFINE_boolean("log_device_placement", False,
+                     "Whether to output logs listing the devices on which "
+                     "variables are placed.")
+flags.DEFINE_boolean("debug", True,
+                     "Run in debug mode.")
+
+# Render
+flags.DEFINE_integer("render_secs", 600,
+                     "Number of seconds between triggering render jobs.")
+flags.DEFINE_string("render_out_dir", None,
+                    "The path to which to copy generated renders.")
+
+# Algorithm
+flags.DEFINE_string("algorithm", "agents.ppo.PPOAlgorithm",
+                    "The name of the algorithm to use.")
+flags.DEFINE_integer("num_agents", 30,
+                     "The number of agents to use.")
+flags.DEFINE_integer("eval_episodes", 25,
+                     "The number of eval episodes to use.")
+flags.DEFINE_string("env", "AntBulletEnv-v0",
+                    "The gym / bullet simulation environment to use.")
+flags.DEFINE_integer("max_length", 1000,
+                     "The maximum length of an episode.")
+flags.DEFINE_integer("steps", 10000000,
+                     "The number of steps.")
+
+# Network
+flags.DEFINE_string("network", "agents.scripts.networks.feed_forward_gaussian",
+                    "The registered network name to use for policy and value.")
+flags.DEFINE_float("init_mean_factor", 0.1,
+                   "")
+flags.DEFINE_float("init_std", 0.35,
+                   "")
+
+# Optimization
+flags.DEFINE_float("learning_rate", 1e-4,
+                   "The learning rate of the optimizer.")
+flags.DEFINE_string("optimizer", "tensorflow.train.AdamOptimizer",
+                    "The import path of the optimizer to use.")
+flags.DEFINE_integer("update_epochs", 25,
+                     "The number of update epochs.")
+flags.DEFINE_integer("update_every", 60,
+                     "The update frequency.")
+
+# Losses
+flags.DEFINE_float("discount", 0.995,
+                   "The discount.")
+flags.DEFINE_float("kl_target", 1e-2,
+                   "the KL target.")
+flags.DEFINE_integer("kl_cutoff_factor", 2,
+                     "The KL cutoff factor.")
+flags.DEFINE_integer("kl_cutoff_coef", 1000,
+                     "The KL cutoff coefficient.")
+flags.DEFINE_integer("kl_init_penalty", 1,
+                     "The initial KL penalty?.")
+
+FLAGS = flags.FLAGS
+
+
+def hparams_base():
+  """Base hparams tf/Agents PPO """
+
+  # General
+  algorithm = agents.ppo.PPOAlgorithm
+  num_agents = 30
+  eval_episodes = 30
+  use_gpu = False
+
+  # Environment
+  env = 'KukaBulletEnv-v0'
+  normalize_ranges = True
+  max_length = 1000
+
+  # Network
+  network = agents.scripts.networks.feed_forward_gaussian
+  weight_summaries = dict(
+      all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
+  policy_layers = 200, 100
+  value_layers = 200, 100
+  init_output_factor = 0.1
+  init_logstd = -1
+  init_std = 0.35
+
+  # Optimization
+  update_every = 60
+  update_epochs = 25
+  optimizer = tf.train.AdamOptimizer
+  learning_rate = 1e-4
+  steps = 3e7  # 30M
+
+  # Losses
+  discount = 0.995
+  kl_target = 1e-2
+  kl_cutoff_factor = 2
+  kl_cutoff_coef = 1000
+  kl_init_penalty = 1
+
+  return locals()
+
+
+def _object_import_from_string(name):
+  components = name.split('.')
+  mod = __import__(components[0])
+  for comp in components[1:]:
+    mod = getattr(mod, comp)
+  return mod
+
+
+def _realize_import_attrs(d, filter):
+  for k, v in d.items():
+    if k in filter:
+      imported = _object_import_from_string(v)
+      # TODO: Provide an appropriately informative error if the import fails
+      # except ImportError as e:
+      #   msg = ("Failed to realize import path %s." % v)
+      #   raise e
+      d[k] = imported
+  return d
+
+
+def _get_agents_configuration(hparam_set_name, log_dir=None, is_chief=False):
+  """Load hyperparameter config."""
+  try:
+    # Try to resume training.
+    hparams = agents.scripts.utility.load_config(log_dir)
+  except IOError:
+
+    hparams = hparams_base()
+
+    # --------
+    # Experiment extending base hparams with FLAGS and dynamic import of
+    # network and algorithm.
+    for k, v in FLAGS.__dict__['__flags'].items():
+      hparams[k] = v
+    hparams = _realize_import_attrs(
+        hparams, ["network", "algorithm", "optimizer"])
+    # --------
+
+    hparams = agents.tools.AttrDict(hparams)
+    hparams = agents.scripts.utility.save_config(hparams, log_dir)
+
+  pprint.pprint(hparams)
+  return hparams
+
+
+def gcs_upload(local_dir, gcs_out_dir):
+  """Upload the contents of a local directory to a specific GCS path.
+
+  Args:
+    local_dir (str): The local directory containing files to upload.
+    gcs_out_dir (str): The target Google Cloud Storage directory path.
+
+  Raises:
+    ValueError: If `gcs_out_dir` does not start with "gs://".
+
+  """
+
+  # Get a list of all files in the local_dir
+  local_files = [f for f in os.listdir(
+      local_dir) if os.path.isfile(os.path.join(local_dir, f))]
+  tf.logging.info("Preparing local files for upload:\n %s" % local_files)
+
+  # Initialize the GCS API client
+  storage_client = storage.Client()
+
+  # Raise an error if the target directory cannot be a GCS path
+  if not gcs_out_dir.startswith("gs://"):
+    raise ValueError(
+        "gcs_upload expected gcs_out_dir argument to start with gs://, saw %s" % gcs_out_dir)
+
+  # TODO: Detect and handle case where a GCS path has been provdied
+  # corresponding to a bucket that does not exist or for which the user does
+  # not have permissions.
+
+  # Obtain the bucket path from the total path
+  bucket_path = gcs_out_dir.split('/')[2]
+  bucket = storage_client.get_bucket(bucket_path)
+
+  # Construct a target upload path that excludes the initial gs://bucket-name
+  blob_base_path = '/'.join(gcs_out_dir.split('/')[3:])
+
+  # For each local file *name* in the list of local file names
+  for local_filename in local_files:
+
+    # Construct the target and local *paths*
+    blob_path = os.path.join(blob_base_path, local_filename)
+    blob = bucket.blob(blob_path)
+    local_file_path = os.path.join(local_dir, local_filename)
+
+    # Perform the upload operation
+    blob.upload_from_filename(local_file_path)
+
+
+def main(unused_argv):
+  """Run training."""
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  if FLAGS.debug:
+    tf.logging.set_verbosity(tf.logging.DEBUG)
+
+  run_config = tf.contrib.learn.RunConfig()
+
+  log_dir = FLAGS.logdir
+
+  agents_config = _get_agents_configuration(
+      FLAGS.hparam_set_id, log_dir, run_config.is_chief)
+
+  if FLAGS.run_mode == 'train':
+    for score in agents.scripts.train.train(agents_config, env_processes=True):
+      logging.info('Score {}.'.format(score))
+  if FLAGS.run_mode == 'render':
+    now = datetime.datetime.now()
+    subdir = now.strftime("%m%d-%H%M") + "-" + uuid.uuid4().hex[0:4]
+    render_tmp_dir = "/tmp/agents-render/"
+    os.system('mkdir -p %s' % render_tmp_dir)
+    agents.scripts.visualize.visualize(
+        logdir=FLAGS.logdir, outdir=render_tmp_dir, num_agents=1, num_episodes=1,
+        checkpoint=None, env_processes=True)
+    render_out_dir = FLAGS.render_out_dir
+    # Unless a render out dir is specified explicitly upload to a unique subdir
+    # of the log dir with the parent render/
+    if render_out_dir is None:
+      render_out_dir = os.path.join(FLAGS.logdir, "render", subdir)
+    gcs_upload(render_tmp_dir, render_out_dir)
+
+
+if __name__ == '__main__':
+  tf.app.run()