mirror of https://github.com/kubeflow/examples.git
Update ksonnet for datagen (#309)
* Update the datagen component. * We should use a K8s job rather than a TFJob. We can also simplify the ksonnet by just putting the spec into the jsonnet file rather than trying to share various bits of the spec with the TFJob for training. Related to kubeflow/examples#308 use globals to allow parameters to be shared across components (e.g. working directory.) * Update the README with information about data. * Fix table markdown.
This commit is contained in:
parent
11879e2ff1
commit
d01b76b6f9
|
@ -3,4 +3,32 @@
|
|||
This directory contains assets for setting up a demo of the code search example.
|
||||
It is primarily intended for use by Kubeflow contributors working on the shared demo.
|
||||
|
||||
Users looking to run the example should follow the README.md in the parent directory.
|
||||
Users looking to run the example should follow the README.md in the parent directory.
|
||||
|
||||
# GCP Resources
|
||||
|
||||
We are using the following project
|
||||
|
||||
* **org**: kubeflow.org
|
||||
* **project**: code-search-demo
|
||||
* **[code-search-team@kubeflow.org](https://github.com/kubeflow/internal-acls/blob/master/code-search-team.members.txt)** Google group administering access
|
||||
|
||||
# Results
|
||||
|
||||
## 2018-11-05
|
||||
|
||||
jlewi@ ran experiments that produced the following results
|
||||
|
||||
| What | location | Description
|
||||
|------|----------|-------------------------
|
||||
| Preprocessed data| gs://code-search-demo/20181104/data/func-doc-pairs-00???-of-00100.csv | This is the output of the Dataflow preprocessing job
|
||||
| Training data | gs://code-search-demo/20181104/data/kf_github_function_docstring-train-00???-of-00100 | TFRecord files produced by running T2T datagen
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
"t2t-code-search-datagen": {
|
||||
jobType: "datagen",
|
||||
name: "t2t-code-search-datagen",
|
||||
image: $.components["t2t-job"].image,
|
||||
problem: $.components["t2t-code-search"].problem,
|
||||
dataDir: $.components["t2t-code-search"].workingDir + "/data",
|
||||
},
|
||||
|
|
|
@ -1,7 +1,66 @@
|
|||
// A K8s job to run datagen using T2T.
|
||||
local k = import "k.libsonnet";
|
||||
local t2tJob = import "t2t-job.libsonnet";
|
||||
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2t-code-search-datagen"];
|
||||
|
||||
std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job]))
|
||||
local jobSpec = {
|
||||
apiVersion: "batch/v1",
|
||||
kind: "Job",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
restartPolicy: "OnFailure",
|
||||
containers: [
|
||||
{
|
||||
name: "t2t-datagen",
|
||||
image: params.image,
|
||||
command: [
|
||||
"/usr/local/sbin/t2t-entrypoint",
|
||||
"t2t-datagen",
|
||||
"--problem=" + params.problem,
|
||||
"--data_dir=" + params.dataDir,
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
workingDir: "/src",
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
], //volumeMounts
|
||||
},
|
||||
], // containers
|
||||
volumes: [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "user-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
}, // spec
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([jobSpec]))
|
||||
|
|
|
@ -1,14 +1,6 @@
|
|||
local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
||||
|
||||
{
|
||||
getDatagenCmd(params)::
|
||||
[
|
||||
"/usr/local/sbin/t2t-entrypoint",
|
||||
"t2t-datagen",
|
||||
"--problem=" + params.problem,
|
||||
"--data_dir=" + params.dataDir,
|
||||
],
|
||||
|
||||
{
|
||||
getExporterCmd(params)::
|
||||
[
|
||||
"/usr/local/sbin/t2t-entrypoint",
|
||||
|
@ -104,7 +96,6 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
|
||||
local cmd = $.getTrainerCmd(params),
|
||||
local workerCmd = if params.jobType == "exporter" then $.getExporterCmd(params)
|
||||
else if params.jobType == "datagen" then $.getDatagenCmd(params)
|
||||
else cmd.worker,
|
||||
|
||||
job:: {
|
||||
|
|
Loading…
Reference in New Issue