Merge pull request #30 from cwbeitel/agents

Reinforcement learning example with TensorFlow Agents
This commit is contained in:
Jeremy Lewi 2018-03-09 14:59:59 -08:00 committed by GitHub
commit 0837557219
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 1967 additions and 0 deletions

26
agents/Dockerfile Normal file
View File

@ -0,0 +1,26 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM tensorflow/tensorflow:1.4.1
# Needed for rendering and uploading renders
RUN apt-get update
RUN apt-get install -y libav-tools ffmpeg git
ADD requirements.txt /app/
RUN pip install -r /app/requirements.txt
ADD trainer /app/trainer/
WORKDIR /app/
ENTRYPOINT ["python", "-m", "trainer.task"]

31
agents/README.md Normal file
View File

@ -0,0 +1,31 @@
# Reinforcement Learning with [tensorflow/agents](https://github.com/tensorflow/agents)
Here we provide a demonstration of training a reinforcement learning agent to perform a robotic grasping task using Kubeflow running on Google Kubernetes Engine. In this demonstration you will learn how to paramaeterize a training job, submit it to run on your cluster, monitor the job including launching a tensorboard instance, and finally producing renders of the agent performing the robotic grasping task.
For clarity and fun you can check out what the product of this tutorial will look like by clicking through the render screenshot below to a short video of a trained agent performing a simulated robotic block grasping task:
[![](doc/render_preview.png)](https://youtu.be/0X0w5XOtcHw)
### Setup
##### GCP and Kubeflow configuration
This tutorial assumes you have deployed a Kubernetes cluster on your provider of choice and have completed the steps described in the [Kubeflow User Guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) to deploy the core, argo, and nfs components.
##### Launching
This example is intended to be run inside of the `gcr.io/kubeflow/tensorflow-notebook-cpu` container running on JupyterHub which is in turn running on Kubeflow. You may provide the name of this container via the spawner options dialog.
For general troubleshooting of the spawning of notebook containers on JupyterHub or anything else related to your Kubeflow deployment please refer to the [Kubeflow User Guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md).
Once the notebook is launched the only setup step that is required is to use git to obtain the source code of the example which you can do as follows (from within the tensorflow-notebook container):
```bash
cd /home/jovyan
git clone https://github.com/kubeflow/examples kubeflow-examples
```
The demonstration notebook can then be accessed via /home/jovyan/kubeflow-examples/agents/doc/demo.ipynb.
Well it looks like our initial setup is finished 🎉🎉 and it's time to start playing around with that shiny new demonstration notebook!!

View File

@ -0,0 +1,39 @@
apiVersion: "0.1"
gitVersion:
commitSha: 422d521c05aa905df949868143b26445f5e4eda5
refSpec: master
kind: ksonnet.io/registry
libraries:
apache:
path: apache
version: master
efk:
path: efk
version: master
mariadb:
path: mariadb
version: master
memcached:
path: memcached
version: master
mongodb:
path: mongodb
version: master
mysql:
path: mysql
version: master
nginx:
path: nginx
version: master
node:
path: node
version: master
postgres:
path: postgres
version: master
redis:
path: redis
version: master
tomcat:
path: tomcat
version: master

View File

@ -0,0 +1,18 @@
apiVersion: "0.1"
gitVersion:
commitSha: d7d859206f52168665cbc312391c67c162927e96
refSpec: master
kind: ksonnet.io/registry
libraries:
argo:
path: argo
version: master
core:
path: core
version: master
tf-job:
path: tf-job
version: master
tf-serving:
path: tf-serving
version: master

30
agents/app/app.yaml Normal file
View File

@ -0,0 +1,30 @@
apiVersion: 0.0.1
kind: ksonnet.io/app
libraries:
core:
gitVersion:
commitSha: d7d859206f52168665cbc312391c67c162927e96
refSpec: master
name: core
registry: kubeflow
tf-job:
gitVersion:
commitSha: 30f913986607bb8414d51ef6a23fe60fec9afe3c
refSpec: master
name: tf-job
registry: kubeflow
name: app
registries:
incubator:
gitVersion:
commitSha: 422d521c05aa905df949868143b26445f5e4eda5
refSpec: master
protocol: github
uri: github.com/ksonnet/parts/tree/master/incubator
kubeflow:
gitVersion:
commitSha: 30f913986607bb8414d51ef6a23fe60fec9afe3c
refSpec: master
protocol: github
uri: github.com/kubeflow/kubeflow/tree/master/kubeflow
version: 0.0.1

View File

@ -0,0 +1,98 @@
local params = std.extVar("__ksonnet/params").components["agents"];
local k = import 'k.libsonnet';
local deployment = k.extensions.v1beta1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podTemplate = k.extensions.v1beta1.podTemplate;
local tfJob = import 'kubeflow/tf-job/tf-job.libsonnet';
local name = params.name;
local namespace = params.namespace;
local num_gpus = params.num_gpus;
local hparam_set_id = params.hparam_set_id;
local jobTag = params.job_tag;
local image = params.image;
local imageGpu = params.image_gpu;
local numCpu = params.num_cpu;
local dumpDependencyVersions = params.dump_dependency_versions;
local log_dir = params.log_dir;
local hparamSetID = params.hparam_set_id;
local runBaseTag = params.run_base_tag;
local syncReplicas = params.sync_replicas;
local algorithm = params.algorithm;
local numAgents = params.num_agents;
local evalEpisodes = params.eval_episodes;
local env = params.env;
local maxLength = params.max_length;
local steps = params.steps;
local network = params.network;
local initMeanFactor = params.init_mean_factor;
local learningRate = params.learning_rate;
local optimizer = params.optimizer;
local updateEpochs = params.update_epochs;
local updateEvery = params.update_every;
local discount = params.discount;
local klTarget = params.kl_target;
local klCutoffFactor = params.kl_cutoff_factor;
local klCutoffCoef = params.kl_cutoff_coef;
local klInitPenalty = params.kl_init_penalty;
local renderSecs = params.render_secs;
local args = [
"--run_mode=train",
"--logdir=" + log_dir,
"--hparam_set_id=" + hparamSetID,
"--run_base_tag=" + runBaseTag,
"--sync_replicas=" + syncReplicas,
"--num_gpus=" + num_gpus,
"--algorithm=" + algorithm,
"--num_agents=" + numAgents,
"--eval_episodes=" + evalEpisodes,
"--env=" + env,
"--max_length=" + maxLength,
"--steps=" + steps,
"--network=" + network,
"--init_mean_factor=" + initMeanFactor,
"--learning_rate=" + learningRate,
"--optimizer=" + optimizer,
"--update_epochs=" + updateEpochs,
"--update_every=" + updateEvery,
"--discount=" + discount,
"--kl_target=" + klTarget,
"--kl_cutoff_factor=" + klCutoffFactor,
"--kl_cutoff_coef=" + klCutoffCoef,
"--kl_init_penalty=" + klInitPenalty,
"--dump_dependency_versions=" + dumpDependencyVersions,
"--render_secs=" + renderSecs,
];
local workerSpec = if num_gpus > 0 then
tfJob.parts.tfJobReplica("MASTER", 1, args, imageGpu, num_gpus)
else
tfJob.parts.tfJobReplica("MASTER", 1, args, image);
local replicas = std.map(function(s)
s + {
template+: {
spec+: {
containers: [
s.template.spec.containers[0] + {
resources: {
limits: {
cpu: numCpu
},
requests: {
cpu: numCpu
}
},
},
],
},
},
},
std.prune([workerSpec]));
local job = tfJob.parts.tfJob(name, namespace, replicas);
std.prune(k.core.v1.list.new([job]))

View File

@ -0,0 +1,51 @@
local params = std.extVar("__ksonnet/params").components["agents_render"];
local k = import 'k.libsonnet';
local deployment = k.extensions.v1beta1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podTemplate = k.extensions.v1beta1.podTemplate;
local tfJob = import 'kubeflow/tf-job/tf-job.libsonnet';
local name = params.name;
local namespace = params.namespace;
local num_gpus = params.num_gpus;
local log_dir = params.log_dir;
local imageGpu = "";
local image = params.image;
local numCpu = params.num_cpu;
local args = [
"--run_mode=render",
"--logdir=" + log_dir,
"--num_agents=1"
];
local workerSpec = if num_gpus > 0 then
tfJob.parts.tfJobReplica("MASTER", 1, args, imageGpu, num_gpus)
else
tfJob.parts.tfJobReplica("MASTER", 1, args, image);
local replicas = std.map(function(s)
s + {
template+: {
spec+: {
containers: [
s.template.spec.containers[0] + {
resources: {
limits: {
cpu: numCpu
},
requests: {
cpu: numCpu
}
},
},
],
},
},
},
std.prune([workerSpec]));
local job = tfJob.parts.tfJob(name, namespace, replicas);
std.prune(k.core.v1.list.new([job]))

View File

@ -0,0 +1,65 @@
{
global: {
// User-defined global parameters; accessible to all component and environments, Ex:
// replicas: 4,
},
components: {
// Component-level parameters, defined initially from 'ks prototype use ...'
// Each object below should correspond to a component in the components/ directory
"agents": {
algorithm: "agents.ppo.PPOAlgorithm",
discount: 0.995,
dump_dependency_versions: "True",
env: "KukaBulletEnv-v0",
eval_episodes: 25,
generate_data: "True",
hparam_set_id: "pybullet_kuka_ff",
image: "gcr.io/kubeflow-rl/agents:0221-2315-5b40",
image_gpu: "null",
init_mean_factor: 0.1,
job_tag: "0206-1409-6174",
kl_cutoff_coef: 1000,
kl_cutoff_factor: 2,
kl_init_penalty: 1,
kl_target: 0.01,
learning_rate: 0.0001,
log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo/kuka-0221-2329-afdd",
max_length: 1000,
name: "kuka-0221-2329-afdd",
namespace: "rl",
network: "agents.scripts.networks.feed_forward_gaussian",
num_agents: 30,
num_cpu: 30,
num_gpus: 0,
num_masters: 1,
num_ps: 1,
num_replicas: 1,
num_workers: 1,
optimizer: "tensorflow.train.AdamOptimizer",
render_secs: 600,
run_base_tag: "0e90193e",
run_mode: "train",
save_checkpoint_secs: 600,
save_checkpoints_secs: 600,
steps: 15000000,
sync_replicas: "False",
update_epochs: 25,
update_every: 60,
},
"agents_render": {
image: "gcr.io/kubeflow-rl/agents:0221-1635-d869",
log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo/kuka-0221-1650-31dc",
name: "render-0221-1705-4149",
namespace: "rl",
num_cpu: 4,
num_gpus: 0,
},
tensorboard: {
log_dir: "gs://kubeflow-rl/studies/replicated-kuka-demo",
name: "tboard-0221-2330-5c5c",
namespace: "rl",
secret: "gcp-credentials",
secret_file_name: "secret.json",
},
},
}

View File

@ -0,0 +1,14 @@
// from github.com/jlewi/kubeflow-rl
local params = std.extVar("__ksonnet/params").components["tensorboard"];
local k = import 'k.libsonnet';
local tb = import "tensorboard.libsonnet";
local name = params.name;
local namespace = params.namespace;
local logDir = params.log_dir;
local secretName = params.secret;
local secretFileName = params.secret_file_name;
std.prune(k.core.v1.list.new([tb.parts(namespace, name).tbDeployment(logDir, secretName, secretFileName),
tb.parts(namespace, name).service]))

View File

@ -0,0 +1,82 @@
{
parts(namespace, name,):: {
service:: {
"apiVersion": "v1",
"kind": "Service",
"metadata": {
"name": name + "-tb",
"namespace": namespace,
},
"spec": {
"ports": [
{
"name": "http",
"port": 80,
"targetPort": 80,
}
],
"selector": {
"app": "tensorboard",
"tb-job": name,
},
},
},
tbDeployment(logDir, secretName, secretFileName, tfImage="gcr.io/tensorflow/tensorflow:latest"):: {
"apiVersion": "apps/v1beta1",
"kind": "Deployment",
"metadata": {
"name": name + "-tb",
"namespace": namespace,
},
"spec": {
"replicas": 1,
"template": {
"metadata": {
"labels": {
"app": "tensorboard",
"tb-job": name,
},
"name": name,
"namespace": namespace,
},
"spec": {
"containers": [
{
"command": [
"/usr/local/bin/tensorboard",
"--logdir=" + logDir,
"--port=80"
],
"image": tfImage,
"name": "tensorboard",
"ports": [
{
"containerPort": 80
}
],
"env": [
{
"name": "GOOGLE_APPLICATION_CREDENTIALS",
"value": "/secret/gcp-credentials/" + secretFileName,
},
],
"volumeMounts": [{
"name": "credentials",
"mountPath": "/secret/gcp-credentials",
}],
}
],
"volumes": [{
"name": "credentials",
"secret": {
"secretName": secretName,
},
}
],
}
}
}
},
},
}

View File

@ -0,0 +1,4 @@
local components = std.extVar("__ksonnet/components");
components + {
// Insert user-specified overrides here.
}

BIN
agents/doc/builder.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 620 KiB

1047
agents/doc/demo.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
agents/doc/jhub-spawn.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

BIN
agents/doc/render.mp4 Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

BIN
agents/doc/sa-create.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 472 KiB

5
agents/requirements.txt Normal file
View File

@ -0,0 +1,5 @@
-e git://github.com/tensorflow/agents.git@459c4f88ece996eac3489e6e97a6ee0b30bdd6b3#egg=agents
pybullet==1.7.5
gym==0.9.4
tensorflow==1.4.1
google-cloud-storage==1.7.0

View File

@ -0,0 +1,22 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM docker:17.10
RUN apk update
RUN apk add --no-cache curl python bash
RUN curl https://sdk.cloud.google.com | bash
RUN ln -s /root/google-cloud-sdk/bin/gsutil /usr/bin/gsutil
RUN ln -s /root/google-cloud-sdk/bin/gcloud /usr/bin/gcloud

View File

@ -0,0 +1,23 @@
# Container builder
[![Docker Repository on Quay](https://quay.io/repository/cwbeitel/builder/status "Docker Repository on Quay")](https://quay.io/repository/cwbeitel/builder)
A custom container builder image can be built in the standard way, e.g.
```bash
YOUR_BUILDER_IMAGE_TAG=quay.io/someuser/builder:0.1
docker build -t $TAG .
gcloud docker -- push $TAG
```
Then specified in the [container builder workflow](../config/builder.yaml) as an argument as follows:
```bash
argo submit config/builder.yaml --namespace kubeflow \
--parameter builder-image=${YOUR_BUILDER_IMAGE_TAG}
...
```
Please refer to the [demonstration notebook](../demo/demo.ipynb) for more details.

View File

@ -0,0 +1,37 @@
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: kubeflow-builder-
spec:
entrypoint: workflow
arguments:
parameters:
- name: bundle
value: unspecified
- name: app-name
value: unspecified
- name: image-tag
value: unspecified
- name: project
value: unspecified
- name: builder-image
value: quay.io/cwbeitel/builder:0.1
templates:
- name: workflow
steps:
- - name: build-push
template: build-and-push
- name: build-and-push
container:
image: "{{workflow.parameters.builder-image}}"
command: [bash,-c]
args: ["mkdir -p /build; gsutil cp {{workflow.parameters.bundle}} /build/ && cd /build && tar -xzvf {{workflow.parameters.app-name}}.tgz; cd {{workflow.parameters.app-name}}; until docker ps; do sleep 3; done; docker build -t {{workflow.parameters.image-tag}} .; gcloud docker -- push {{workflow.parameters.trainer-tag}}; docker save {{workflow.parameters.image-tag}} $(docker history -q {{workflow.parameters.image-tag}}) > /build/cache.tar; gsutil cp /build/cache.tar gs://{{workflow.parameters.project}}-builder/{{workflow.parameters.app-name}}/cache/"]
env:
- name: DOCKER_HOST #the docker daemon can be access on the standard port on localhost
value: 127.0.0.1
sidecars:
- name: dind
image: docker:17.10-dind #Docker already provides an image for running a Docker daemon
securityContext:
privileged: true #the Docker daemon can only run in a privileged container
mirrorVolumeMounts: true

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
#
# Copyright 2017 The Kubeflow Examples Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Perform a git commit after having removed the default ksonnet env
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd ${SCRIPT_DIR}/../../app
ks env rm default
cd ..
git add *
git commit

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
#
# Copyright 2017 The Kubeflow Examples Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build and release demo container
RELEASE_VERSION=0.0.1
APP_TAG=agents-demo
RELEASE_TYPE=${RELEASE_TYPE:-dirty}
if [ -z ${RELEASE_REGISTRY+x} ]; then
echo "Please specify a target registry for the release by setting RELEASE_REGISTRY."
exit 1
fi
IMAGE_TAG=${RELEASE_REGISTRY}/${APP_TAG}:${RELEASE_VERSION}
if [ ${RELEASE_TYPE} == 'dirty' ]; then
echo "Building dirty release."
SALT=`python -c 'import datetime; import uuid; now=datetime.datetime.now(); print(now.strftime("%m%d-%H%M") + "-" + uuid.uuid4().hex[0:4])'`
IMAGE_TAG=${IMAGE_TAG}-${SALT}
fi
echo "Building release with tag: ${IMAGE_TAG}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
APP_DIR=${SCRIPT_DIR}/../../
cd ${APP_DIR}
docker build -t ${IMAGE_TAG} -f doc/Dockerfile .
gcloud docker -- push ${IMAGE_TAG}
#docker push ${IMAGE_TAG}

View File

@ -0,0 +1,19 @@
# Copyright 2017 The TensorFlow Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Proximal Policy Optimization algorithm."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

280
agents/trainer/task.py Normal file
View File

@ -0,0 +1,280 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides an entrypoint for the training task."""
from __future__ import absolute_import, division, print_function
import argparse
import datetime
import logging
import os
import pprint
import uuid
import pip
import tensorflow as tf
from google.cloud import storage
import agents
import pybullet_envs # To make AntBulletEnv-v0 available.
flags = tf.app.flags
flags.DEFINE_string("run_mode", "train",
"Run mode, one of [train, render, train_and_render].")
flags.DEFINE_string("logdir", '/tmp/test',
"The base directory in which to write logs and "
"checkpoints.")
flags.DEFINE_string("hparam_set_id", "pybullet_kuka_ff",
"The name of the config object to be used to parameterize "
"the run.")
flags.DEFINE_string("run_base_tag",
datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
"Base tag to prepend to logs dir folder name. Defaults "
"to timestamp.")
flags.DEFINE_boolean("env_processes", True,
"Step environments in separate processes to circumvent "
"the GIL.")
flags.DEFINE_integer("num_gpus", 0,
"Total number of gpus for each machine."
"If you don't use GPU, please set it to '0'")
flags.DEFINE_integer("save_checkpoint_secs", 600,
"Number of seconds between checkpoint save.")
flags.DEFINE_boolean("log_device_placement", False,
"Whether to output logs listing the devices on which "
"variables are placed.")
flags.DEFINE_boolean("debug", True,
"Run in debug mode.")
# Render
flags.DEFINE_integer("render_secs", 600,
"Number of seconds between triggering render jobs.")
flags.DEFINE_string("render_out_dir", None,
"The path to which to copy generated renders.")
# Algorithm
flags.DEFINE_string("algorithm", "agents.ppo.PPOAlgorithm",
"The name of the algorithm to use.")
flags.DEFINE_integer("num_agents", 30,
"The number of agents to use.")
flags.DEFINE_integer("eval_episodes", 25,
"The number of eval episodes to use.")
flags.DEFINE_string("env", "AntBulletEnv-v0",
"The gym / bullet simulation environment to use.")
flags.DEFINE_integer("max_length", 1000,
"The maximum length of an episode.")
flags.DEFINE_integer("steps", 10000000,
"The number of steps.")
# Network
flags.DEFINE_string("network", "agents.scripts.networks.feed_forward_gaussian",
"The registered network name to use for policy and value.")
flags.DEFINE_float("init_mean_factor", 0.1,
"")
flags.DEFINE_float("init_std", 0.35,
"")
# Optimization
flags.DEFINE_float("learning_rate", 1e-4,
"The learning rate of the optimizer.")
flags.DEFINE_string("optimizer", "tensorflow.train.AdamOptimizer",
"The import path of the optimizer to use.")
flags.DEFINE_integer("update_epochs", 25,
"The number of update epochs.")
flags.DEFINE_integer("update_every", 60,
"The update frequency.")
# Losses
flags.DEFINE_float("discount", 0.995,
"The discount.")
flags.DEFINE_float("kl_target", 1e-2,
"the KL target.")
flags.DEFINE_integer("kl_cutoff_factor", 2,
"The KL cutoff factor.")
flags.DEFINE_integer("kl_cutoff_coef", 1000,
"The KL cutoff coefficient.")
flags.DEFINE_integer("kl_init_penalty", 1,
"The initial KL penalty?.")
FLAGS = flags.FLAGS
def hparams_base():
"""Base hparams tf/Agents PPO """
# General
algorithm = agents.ppo.PPOAlgorithm
num_agents = 30
eval_episodes = 30
use_gpu = False
# Environment
env = 'KukaBulletEnv-v0'
normalize_ranges = True
max_length = 1000
# Network
network = agents.scripts.networks.feed_forward_gaussian
weight_summaries = dict(
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
policy_layers = 200, 100
value_layers = 200, 100
init_output_factor = 0.1
init_logstd = -1
init_std = 0.35
# Optimization
update_every = 60
update_epochs = 25
optimizer = tf.train.AdamOptimizer
learning_rate = 1e-4
steps = 3e7 # 30M
# Losses
discount = 0.995
kl_target = 1e-2
kl_cutoff_factor = 2
kl_cutoff_coef = 1000
kl_init_penalty = 1
return locals()
def _object_import_from_string(name):
components = name.split('.')
mod = __import__(components[0])
for comp in components[1:]:
mod = getattr(mod, comp)
return mod
def _realize_import_attrs(d, filter):
for k, v in d.items():
if k in filter:
imported = _object_import_from_string(v)
# TODO: Provide an appropriately informative error if the import fails
# except ImportError as e:
# msg = ("Failed to realize import path %s." % v)
# raise e
d[k] = imported
return d
def _get_agents_configuration(hparam_set_name, log_dir=None, is_chief=False):
"""Load hyperparameter config."""
try:
# Try to resume training.
hparams = agents.scripts.utility.load_config(log_dir)
except IOError:
hparams = hparams_base()
# --------
# Experiment extending base hparams with FLAGS and dynamic import of
# network and algorithm.
for k, v in FLAGS.__dict__['__flags'].items():
hparams[k] = v
hparams = _realize_import_attrs(
hparams, ["network", "algorithm", "optimizer"])
# --------
hparams = agents.tools.AttrDict(hparams)
hparams = agents.scripts.utility.save_config(hparams, log_dir)
pprint.pprint(hparams)
return hparams
def gcs_upload(local_dir, gcs_out_dir):
"""Upload the contents of a local directory to a specific GCS path.
Args:
local_dir (str): The local directory containing files to upload.
gcs_out_dir (str): The target Google Cloud Storage directory path.
Raises:
ValueError: If `gcs_out_dir` does not start with "gs://".
"""
# Get a list of all files in the local_dir
local_files = [f for f in os.listdir(
local_dir) if os.path.isfile(os.path.join(local_dir, f))]
tf.logging.info("Preparing local files for upload:\n %s" % local_files)
# Initialize the GCS API client
storage_client = storage.Client()
# Raise an error if the target directory cannot be a GCS path
if not gcs_out_dir.startswith("gs://"):
raise ValueError(
"gcs_upload expected gcs_out_dir argument to start with gs://, saw %s" % gcs_out_dir)
# TODO: Detect and handle case where a GCS path has been provdied
# corresponding to a bucket that does not exist or for which the user does
# not have permissions.
# Obtain the bucket path from the total path
bucket_path = gcs_out_dir.split('/')[2]
bucket = storage_client.get_bucket(bucket_path)
# Construct a target upload path that excludes the initial gs://bucket-name
blob_base_path = '/'.join(gcs_out_dir.split('/')[3:])
# For each local file *name* in the list of local file names
for local_filename in local_files:
# Construct the target and local *paths*
blob_path = os.path.join(blob_base_path, local_filename)
blob = bucket.blob(blob_path)
local_file_path = os.path.join(local_dir, local_filename)
# Perform the upload operation
blob.upload_from_filename(local_file_path)
def main(unused_argv):
"""Run training."""
tf.logging.set_verbosity(tf.logging.INFO)
if FLAGS.debug:
tf.logging.set_verbosity(tf.logging.DEBUG)
run_config = tf.contrib.learn.RunConfig()
log_dir = FLAGS.logdir
agents_config = _get_agents_configuration(
FLAGS.hparam_set_id, log_dir, run_config.is_chief)
if FLAGS.run_mode == 'train':
for score in agents.scripts.train.train(agents_config, env_processes=True):
logging.info('Score {}.'.format(score))
if FLAGS.run_mode == 'render':
now = datetime.datetime.now()
subdir = now.strftime("%m%d-%H%M") + "-" + uuid.uuid4().hex[0:4]
render_tmp_dir = "/tmp/agents-render/"
os.system('mkdir -p %s' % render_tmp_dir)
agents.scripts.visualize.visualize(
logdir=FLAGS.logdir, outdir=render_tmp_dir, num_agents=1, num_episodes=1,
checkpoint=None, env_processes=True)
render_out_dir = FLAGS.render_out_dir
# Unless a render out dir is specified explicitly upload to a unique subdir
# of the log dir with the parent render/
if render_out_dir is None:
render_out_dir = os.path.join(FLAGS.logdir, "render", subdir)
gcs_upload(render_tmp_dir, render_out_dir)
if __name__ == '__main__':
tf.app.run()