Remove v1alpah1 TFJobs from the GH issue summarization example. (#264)

* We should be using v1alpha2 exclusively now.
This commit is contained in:
Jeremy Lewi 2018-10-15 09:52:01 -07:00 committed by k8s-ci-robot
parent 4ea761630d
commit 90044d24c4
6 changed files with 0 additions and 312 deletions

View File

@ -24,10 +24,6 @@
namespace: "null",
replicas: 2,
},
tensor2tensor: {
cpuImage: "gcr.io/kubeflow-examples/issue-summarization-t2t-trainer-cpu:v20180428-9da5cb7-dirty-4e1f35",
namespace: "null",
},
tensorboard: {
image: "tensorflow/tensorflow:1.7.0",
// logDir needs to be overwritten based on where the data is
@ -35,22 +31,6 @@
logDir: "",
name: "gh",
},
tfjob: {
image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
input_data_gcs_bucket: "kubeflow-examples",
input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
namespace: "null",
output_model_gcs_bucket: "kubeflow-examples",
output_model_gcs_path: "github-issue-summarization-data/output_model.h5",
sample_size: "100000",
},
"tfjob-pvc": {
image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
input_data: "/data/github_issues.csv",
namespace: "null",
output_model: "/data/model.h5",
sample_size: "2000000",
},
ui: {
namespace: "null",
githubToken: "",

View File

@ -1,7 +0,0 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.tensor2tensor;
local k = import "k.libsonnet";
local tensor2tensor = import "tensor2tensor.libsonnet";
std.prune(k.core.v1.list.new([tensor2tensor.parts(params, env).job]))

View File

@ -1,150 +0,0 @@
{
parts(params, env):: {
// Define some defaults.
local updatedParams = {
sync: "0",
dataDir: "gs://kubeflow-examples-data/gh_issue_summarization/data",
usrDir: "./github",
problem: "github_issue_summarization_problem",
model: "transformer_encoder",
hparams: "transformer_github_issues",
hparamsSet: "transformer_github_issues",
outputGCSPath: "gs://kubecon-gh-demo/gh-t2t-out/temp",
gpuImage: null,
cpuImage: null,
trainSteps: 20000,
evalSteps: 10,
psGpu: 0,
workerGpu: 0,
workers: 3,
masters: 1,
ps: 1,
jobName: "tensor2tensor",
} + params,
local containerEnv = [
{
name: "PYTHONPATH",
value: "/home/jovyan",
}
],
local baseCommand = [
"/home/jovyan/github/t2t_launcher.sh",
"--train_steps=" + updatedParams.trainSteps,
"--hparams_set=" + updatedParams.hparams,
"--model=" + updatedParams.model,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
],
local psCommand = baseCommand + [
"--schedule=run_std_server",
],
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters,
local workerBaseCommand = baseCommand + [
"--schedule=train",
"--sync=" + updatedParams.sync,
"--ps_gpu=" + updatedParams.psGpu,
"--worker_gpu=" + updatedParams.workerGpu,
// We explicitly want to add worker and
"--worker_replicas=" + totalWorkerReplicas,
"--ps_replicas=" + updatedParams.ps,
"--eval_steps=" + updatedParams.evalSteps,
],
local workerCommand = workerBaseCommand + [
"--worker_job=/job:worker",
],
local masterCommand = workerBaseCommand + [
"--worker_job=/job:master",
],
local namespace = env.namespace,
job:: {
apiVersion: "kubeflow.org/v1alpha1",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: env.namespace,
},
spec: {
replicaSpecs: [
{
replicas: 1,
template: {
spec: {
containers: [
{
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
command: masterCommand,
env: containerEnv,
[if updatedParams.workerGpu > 0 then "resources"]: {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
},
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "MASTER",
},
{
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
command: workerCommand,
env: containerEnv,
[if updatedParams.workerGpu > 0 then "resources"]: {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
},
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "WORKER",
},
{
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
image: updatedParams.cpuImage,
name: "tensorflow",
command: psCommand,
env: containerEnv,
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "PS",
},
],
terminationPolicy: {
chief: {
replicaIndex: 0,
replicaName: "MASTER",
},
},
},
}, // job
}, //parts
}

View File

@ -1,61 +0,0 @@
// Train the model reading & writing the data from a PVC.
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["tfjob-pvc"];
local k = import "k.libsonnet";
local tfjob = {
apiVersion: "kubeflow.org/v1alpha1",
kind: "TFJob",
metadata: {
name: "tf-job-issue-summarization-pvc",
namespace: env.namespace,
},
spec: {
replicaSpecs: [
{
replicas: 1,
template: {
spec: {
containers: [
{
image: params.image,
name: "tensorflow",
volumeMounts: [
{
name: "data",
mountPath: "/data",
},
],
command: [
"python",
"/workdir/train.py",
"--sample_size=" + std.toString(params.sample_size),
"--input_data=" + params.input_data,
"--output_model=" + params.output_model,
],
},
],
volumes: [
{
name: "data",
persistentVolumeClaim: {
claimName: "data-pvc",
},
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "MASTER",
},
],
terminationPolicy: {
chief: {
replicaIndex: 0,
replicaName: "MASTER",
},
},
},
};
std.prune(k.core.v1.list.new([tfjob]))

View File

@ -1,7 +0,0 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.tfjob;
local k = import "k.libsonnet";
local tfjob = import "tfjob.libsonnet";
std.prune(k.core.v1.list.new([tfjob.parts(params)]))

View File

@ -1,67 +0,0 @@
{
parts(params):: {
apiVersion: "kubeflow.org/v1alpha1",
kind: "TFJob",
metadata: {
name: "tf-job-issue-summarization",
namespace: params.namespace,
},
spec: {
replicaSpecs: [
{
replicas: 1,
template: {
spec: {
containers: [
{
image: params.image,
name: "tensorflow",
volumeMounts: [
{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
readOnly: true,
},
],
command: [
"python",
],
args: [
"/workdir/train.py",
"--sample_size=" + std.toString(params.sample_size),
"--input_data_gcs_bucket=" + params.input_data_gcs_bucket,
"--input_data_gcs_path=" + params.input_data_gcs_path,
"--output_model_gcs_bucket=" + params.output_model_gcs_bucket,
"--output_model_gcs_path=" + params.output_model_gcs_path,
],
env: [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json",
},
],
},
],
volumes: [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "MASTER",
},
],
terminationPolicy: {
chief: {
replicaIndex: 0,
replicaName: "MASTER",
},
},
},
},
}