mirror of https://github.com/kubeflow/examples.git
				
				
				
			Remove v1alpah1 TFJobs from the GH issue summarization example. (#264)
* We should be using v1alpha2 exclusively now.
This commit is contained in:
		
							parent
							
								
									4ea761630d
								
							
						
					
					
						commit
						90044d24c4
					
				| 
						 | 
				
			
			@ -24,10 +24,6 @@
 | 
			
		|||
      namespace: "null",
 | 
			
		||||
      replicas: 2,
 | 
			
		||||
    },
 | 
			
		||||
    tensor2tensor: {
 | 
			
		||||
      cpuImage: "gcr.io/kubeflow-examples/issue-summarization-t2t-trainer-cpu:v20180428-9da5cb7-dirty-4e1f35",
 | 
			
		||||
      namespace: "null",
 | 
			
		||||
    },
 | 
			
		||||
    tensorboard: {
 | 
			
		||||
      image: "tensorflow/tensorflow:1.7.0",
 | 
			
		||||
      // logDir needs to be overwritten based on where the data is
 | 
			
		||||
| 
						 | 
				
			
			@ -35,22 +31,6 @@
 | 
			
		|||
      logDir: "",
 | 
			
		||||
      name: "gh",
 | 
			
		||||
    },
 | 
			
		||||
    tfjob: {
 | 
			
		||||
      image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
 | 
			
		||||
      input_data_gcs_bucket: "kubeflow-examples",
 | 
			
		||||
      input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
 | 
			
		||||
      namespace: "null",
 | 
			
		||||
      output_model_gcs_bucket: "kubeflow-examples",
 | 
			
		||||
      output_model_gcs_path: "github-issue-summarization-data/output_model.h5",
 | 
			
		||||
      sample_size: "100000",
 | 
			
		||||
    },
 | 
			
		||||
    "tfjob-pvc": {
 | 
			
		||||
      image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
 | 
			
		||||
      input_data: "/data/github_issues.csv",
 | 
			
		||||
      namespace: "null",
 | 
			
		||||
      output_model: "/data/model.h5",
 | 
			
		||||
      sample_size: "2000000",
 | 
			
		||||
    },
 | 
			
		||||
    ui: {
 | 
			
		||||
      namespace: "null",
 | 
			
		||||
      githubToken: "",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +0,0 @@
 | 
			
		|||
local env = std.extVar("__ksonnet/environments");
 | 
			
		||||
local params = std.extVar("__ksonnet/params").components.tensor2tensor;
 | 
			
		||||
local k = import "k.libsonnet";
 | 
			
		||||
 | 
			
		||||
local tensor2tensor = import "tensor2tensor.libsonnet";
 | 
			
		||||
 | 
			
		||||
std.prune(k.core.v1.list.new([tensor2tensor.parts(params, env).job]))
 | 
			
		||||
| 
						 | 
				
			
			@ -1,150 +0,0 @@
 | 
			
		|||
{
 | 
			
		||||
  parts(params, env):: {
 | 
			
		||||
    // Define some defaults.
 | 
			
		||||
    local updatedParams = {
 | 
			
		||||
      sync: "0",
 | 
			
		||||
      
 | 
			
		||||
      dataDir: "gs://kubeflow-examples-data/gh_issue_summarization/data",
 | 
			
		||||
      usrDir: "./github",
 | 
			
		||||
      problem: "github_issue_summarization_problem",
 | 
			
		||||
 | 
			
		||||
      model: "transformer_encoder",
 | 
			
		||||
      hparams: "transformer_github_issues",
 | 
			
		||||
      hparamsSet: "transformer_github_issues",      
 | 
			
		||||
      outputGCSPath: "gs://kubecon-gh-demo/gh-t2t-out/temp",
 | 
			
		||||
 | 
			
		||||
      gpuImage: null,
 | 
			
		||||
      cpuImage: null,
 | 
			
		||||
 | 
			
		||||
      trainSteps: 20000,
 | 
			
		||||
      evalSteps: 10,
 | 
			
		||||
 | 
			
		||||
      psGpu: 0,
 | 
			
		||||
      workerGpu: 0,
 | 
			
		||||
 | 
			
		||||
      workers: 3,
 | 
			
		||||
      masters: 1,
 | 
			
		||||
      ps: 1,
 | 
			
		||||
 | 
			
		||||
      jobName: "tensor2tensor",
 | 
			
		||||
    } + params,
 | 
			
		||||
 | 
			
		||||
    local containerEnv = [
 | 
			
		||||
      {
 | 
			
		||||
        name: "PYTHONPATH",
 | 
			
		||||
        value: "/home/jovyan",
 | 
			
		||||
      }
 | 
			
		||||
    ],
 | 
			
		||||
    local baseCommand = [      
 | 
			
		||||
      "/home/jovyan/github/t2t_launcher.sh",
 | 
			
		||||
      "--train_steps=" + updatedParams.trainSteps,
 | 
			
		||||
      "--hparams_set=" + updatedParams.hparams,
 | 
			
		||||
      "--model=" + updatedParams.model,
 | 
			
		||||
      "--problem=" + updatedParams.problem,
 | 
			
		||||
      "--t2t_usr_dir=" + updatedParams.usrDir,
 | 
			
		||||
      "--data_dir=" + updatedParams.dataDir,
 | 
			
		||||
      "--output_dir=" + updatedParams.outputGCSPath,
 | 
			
		||||
    ],
 | 
			
		||||
    local psCommand = baseCommand + [
 | 
			
		||||
      "--schedule=run_std_server",
 | 
			
		||||
    ],
 | 
			
		||||
    local totalWorkerReplicas = updatedParams.workers + updatedParams.masters,
 | 
			
		||||
    local workerBaseCommand = baseCommand + [
 | 
			
		||||
      "--schedule=train",
 | 
			
		||||
      "--sync=" + updatedParams.sync,
 | 
			
		||||
      "--ps_gpu=" + updatedParams.psGpu,
 | 
			
		||||
      "--worker_gpu=" + updatedParams.workerGpu,
 | 
			
		||||
      // We explicitly want to add worker and 
 | 
			
		||||
      "--worker_replicas=" + totalWorkerReplicas,
 | 
			
		||||
      "--ps_replicas=" + updatedParams.ps,
 | 
			
		||||
      "--eval_steps=" + updatedParams.evalSteps,
 | 
			
		||||
    ],
 | 
			
		||||
    local workerCommand = workerBaseCommand + [
 | 
			
		||||
      "--worker_job=/job:worker",
 | 
			
		||||
    ],
 | 
			
		||||
    local masterCommand = workerBaseCommand + [
 | 
			
		||||
      "--worker_job=/job:master",
 | 
			
		||||
    ],
 | 
			
		||||
    local namespace = env.namespace,
 | 
			
		||||
 | 
			
		||||
    job:: {
 | 
			
		||||
      apiVersion: "kubeflow.org/v1alpha1",
 | 
			
		||||
      kind: "TFJob",
 | 
			
		||||
      metadata: {
 | 
			
		||||
        name: updatedParams.jobName,
 | 
			
		||||
        namespace: env.namespace,
 | 
			
		||||
      },
 | 
			
		||||
      spec: {
 | 
			
		||||
        replicaSpecs: [
 | 
			
		||||
          {
 | 
			
		||||
            replicas: 1,
 | 
			
		||||
            template: {
 | 
			
		||||
              spec: {
 | 
			
		||||
                containers: [
 | 
			
		||||
                  {
 | 
			
		||||
                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
 | 
			
		||||
                    name: "tensorflow",
 | 
			
		||||
                    command: masterCommand,
 | 
			
		||||
                    env: containerEnv,
 | 
			
		||||
                    [if updatedParams.workerGpu > 0 then "resources"]: {
 | 
			
		||||
                      limits: {
 | 
			
		||||
                        "nvidia.com/gpu": updatedParams.workerGpu,
 | 
			
		||||
                      },
 | 
			
		||||
                    },
 | 
			
		||||
                  },
 | 
			
		||||
                ],
 | 
			
		||||
                restartPolicy: "OnFailure",
 | 
			
		||||
              },
 | 
			
		||||
            },
 | 
			
		||||
            tfReplicaType: "MASTER",
 | 
			
		||||
          },
 | 
			
		||||
          {
 | 
			
		||||
            replicas: updatedParams.workers,
 | 
			
		||||
            template: {
 | 
			
		||||
              spec: {
 | 
			
		||||
                containers: [
 | 
			
		||||
                  {
 | 
			
		||||
                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
 | 
			
		||||
                    name: "tensorflow",
 | 
			
		||||
                    command: workerCommand,
 | 
			
		||||
                    env: containerEnv,
 | 
			
		||||
                    [if updatedParams.workerGpu > 0 then "resources"]: {
 | 
			
		||||
                      limits: {
 | 
			
		||||
                        "nvidia.com/gpu": updatedParams.workerGpu,
 | 
			
		||||
                      },
 | 
			
		||||
                    },
 | 
			
		||||
                  },
 | 
			
		||||
                ],
 | 
			
		||||
                restartPolicy: "OnFailure",
 | 
			
		||||
              },
 | 
			
		||||
            },
 | 
			
		||||
            tfReplicaType: "WORKER",
 | 
			
		||||
          },
 | 
			
		||||
          {
 | 
			
		||||
            replicas: updatedParams.ps,
 | 
			
		||||
            template: {
 | 
			
		||||
              spec: {
 | 
			
		||||
                containers: [
 | 
			
		||||
                  {
 | 
			
		||||
                    image: updatedParams.cpuImage,
 | 
			
		||||
                    name: "tensorflow",
 | 
			
		||||
                    command: psCommand,
 | 
			
		||||
                    env: containerEnv,
 | 
			
		||||
                  },
 | 
			
		||||
                ],
 | 
			
		||||
                restartPolicy: "OnFailure",
 | 
			
		||||
              },
 | 
			
		||||
            },
 | 
			
		||||
            tfReplicaType: "PS",
 | 
			
		||||
          },
 | 
			
		||||
        ],
 | 
			
		||||
        terminationPolicy: {
 | 
			
		||||
          chief: {
 | 
			
		||||
            replicaIndex: 0,
 | 
			
		||||
            replicaName: "MASTER",
 | 
			
		||||
          },
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    },  // job
 | 
			
		||||
  },  //parts
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,61 +0,0 @@
 | 
			
		|||
// Train the model reading & writing the data from a PVC.
 | 
			
		||||
local env = std.extVar("__ksonnet/environments");
 | 
			
		||||
local params = std.extVar("__ksonnet/params").components["tfjob-pvc"];
 | 
			
		||||
local k = import "k.libsonnet";
 | 
			
		||||
 | 
			
		||||
local tfjob = {
 | 
			
		||||
    apiVersion: "kubeflow.org/v1alpha1",
 | 
			
		||||
    kind: "TFJob",
 | 
			
		||||
    metadata: {
 | 
			
		||||
      name: "tf-job-issue-summarization-pvc",
 | 
			
		||||
      namespace: env.namespace,
 | 
			
		||||
    },
 | 
			
		||||
    spec: {
 | 
			
		||||
      replicaSpecs: [
 | 
			
		||||
        {
 | 
			
		||||
          replicas: 1,
 | 
			
		||||
          template: {
 | 
			
		||||
            spec: {
 | 
			
		||||
              containers: [
 | 
			
		||||
                {
 | 
			
		||||
                  image: params.image,
 | 
			
		||||
                  name: "tensorflow",
 | 
			
		||||
                  volumeMounts: [
 | 
			
		||||
                    {
 | 
			
		||||
                      name: "data",
 | 
			
		||||
                      mountPath: "/data",                    
 | 
			
		||||
                    },
 | 
			
		||||
                  ],
 | 
			
		||||
                  command: [
 | 
			
		||||
                    "python",
 | 
			
		||||
                    "/workdir/train.py",
 | 
			
		||||
                    "--sample_size=" + std.toString(params.sample_size),
 | 
			
		||||
                    "--input_data=" + params.input_data,
 | 
			
		||||
                    "--output_model=" + params.output_model,                    
 | 
			
		||||
                  ],
 | 
			
		||||
                },
 | 
			
		||||
              ],
 | 
			
		||||
              volumes: [
 | 
			
		||||
                    {
 | 
			
		||||
		            name: "data",
 | 
			
		||||
		            persistentVolumeClaim: {
 | 
			
		||||
		              claimName: "data-pvc",
 | 
			
		||||
		            },
 | 
			
		||||
		          },
 | 
			
		||||
              ],
 | 
			
		||||
              restartPolicy: "OnFailure",
 | 
			
		||||
            },
 | 
			
		||||
          },
 | 
			
		||||
          tfReplicaType: "MASTER",
 | 
			
		||||
        },
 | 
			
		||||
      ],
 | 
			
		||||
      terminationPolicy: {
 | 
			
		||||
        chief: {
 | 
			
		||||
          replicaIndex: 0,
 | 
			
		||||
          replicaName: "MASTER",
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    },
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
std.prune(k.core.v1.list.new([tfjob]))
 | 
			
		||||
| 
						 | 
				
			
			@ -1,7 +0,0 @@
 | 
			
		|||
local env = std.extVar("__ksonnet/environments");
 | 
			
		||||
local params = std.extVar("__ksonnet/params").components.tfjob;
 | 
			
		||||
local k = import "k.libsonnet";
 | 
			
		||||
 | 
			
		||||
local tfjob = import "tfjob.libsonnet";
 | 
			
		||||
 | 
			
		||||
std.prune(k.core.v1.list.new([tfjob.parts(params)]))
 | 
			
		||||
| 
						 | 
				
			
			@ -1,67 +0,0 @@
 | 
			
		|||
{
 | 
			
		||||
  parts(params):: {
 | 
			
		||||
    apiVersion: "kubeflow.org/v1alpha1",
 | 
			
		||||
    kind: "TFJob",
 | 
			
		||||
    metadata: {
 | 
			
		||||
      name: "tf-job-issue-summarization",
 | 
			
		||||
      namespace: params.namespace,
 | 
			
		||||
    },
 | 
			
		||||
    spec: {
 | 
			
		||||
      replicaSpecs: [
 | 
			
		||||
        {
 | 
			
		||||
          replicas: 1,
 | 
			
		||||
          template: {
 | 
			
		||||
            spec: {
 | 
			
		||||
              containers: [
 | 
			
		||||
                {
 | 
			
		||||
                  image: params.image,
 | 
			
		||||
                  name: "tensorflow",
 | 
			
		||||
                  volumeMounts: [
 | 
			
		||||
                    {
 | 
			
		||||
                      name: "gcp-credentials",
 | 
			
		||||
                      mountPath: "/secret/gcp-credentials",
 | 
			
		||||
                      readOnly: true,
 | 
			
		||||
                    },
 | 
			
		||||
                  ],
 | 
			
		||||
                  command: [
 | 
			
		||||
                    "python",
 | 
			
		||||
                  ],
 | 
			
		||||
                  args: [
 | 
			
		||||
                    "/workdir/train.py",
 | 
			
		||||
                    "--sample_size=" + std.toString(params.sample_size),
 | 
			
		||||
                    "--input_data_gcs_bucket=" + params.input_data_gcs_bucket,
 | 
			
		||||
                    "--input_data_gcs_path=" + params.input_data_gcs_path,
 | 
			
		||||
                    "--output_model_gcs_bucket=" + params.output_model_gcs_bucket,
 | 
			
		||||
                    "--output_model_gcs_path=" + params.output_model_gcs_path,
 | 
			
		||||
                  ],
 | 
			
		||||
                  env: [
 | 
			
		||||
                    {
 | 
			
		||||
                      name: "GOOGLE_APPLICATION_CREDENTIALS",
 | 
			
		||||
                      value: "/secret/gcp-credentials/key.json",
 | 
			
		||||
                    },
 | 
			
		||||
                  ],
 | 
			
		||||
                },
 | 
			
		||||
              ],
 | 
			
		||||
              volumes: [
 | 
			
		||||
                {
 | 
			
		||||
                  name: "gcp-credentials",
 | 
			
		||||
                  secret: {
 | 
			
		||||
                    secretName: "gcp-credentials",
 | 
			
		||||
                  },
 | 
			
		||||
                },
 | 
			
		||||
              ],
 | 
			
		||||
              restartPolicy: "OnFailure",
 | 
			
		||||
            },
 | 
			
		||||
          },
 | 
			
		||||
          tfReplicaType: "MASTER",
 | 
			
		||||
        },
 | 
			
		||||
      ],
 | 
			
		||||
      terminationPolicy: {
 | 
			
		||||
        chief: {
 | 
			
		||||
          replicaIndex: 0,
 | 
			
		||||
          replicaName: "MASTER",
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue