Remove v1alpah1 TFJobs from the GH issue summarization example. (#264)

* We should be using v1alpha2 exclusively now.
2018-10-15 09:52:01 -07:00 · 2018-10-15 09:52:01 -07:00 · 90044d24c4
parent 4ea761630d
commit 90044d24c4
6 changed files with 0 additions and 312 deletions
--- a/github_issue_summarization/ks-kubeflow/components/params.libsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/params.libsonnet
@ -24,10 +24,6 @@
      namespace: "null",
      replicas: 2,
    },
    tensor2tensor: {
      cpuImage: "gcr.io/kubeflow-examples/issue-summarization-t2t-trainer-cpu:v20180428-9da5cb7-dirty-4e1f35",
      namespace: "null",
    },
    tensorboard: {
      image: "tensorflow/tensorflow:1.7.0",
      // logDir needs to be overwritten based on where the data is
@ -35,22 +31,6 @@
      logDir: "",
      name: "gh",
    },
    tfjob: {
      image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
      input_data_gcs_bucket: "kubeflow-examples",
      input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
      namespace: "null",
      output_model_gcs_bucket: "kubeflow-examples",
      output_model_gcs_path: "github-issue-summarization-data/output_model.h5",
      sample_size: "100000",
    },
    "tfjob-pvc": {
      image: "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888",
      input_data: "/data/github_issues.csv",
      namespace: "null",
      output_model: "/data/model.h5",
      sample_size: "2000000",
    },
    ui: {
      namespace: "null",
      githubToken: "",
--- a/github_issue_summarization/ks-kubeflow/components/tensor2tensor.jsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/tensor2tensor.jsonnet
@ -1,7 +0,0 @@
 local env = std.extVar("__ksonnet/environments");
 local params = std.extVar("__ksonnet/params").components.tensor2tensor;
 local k = import "k.libsonnet";
 local tensor2tensor = import "tensor2tensor.libsonnet";
 std.prune(k.core.v1.list.new([tensor2tensor.parts(params, env).job]))
--- a/github_issue_summarization/ks-kubeflow/components/tensor2tensor.libsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/tensor2tensor.libsonnet
@ -1,150 +0,0 @@
 {
  parts(params, env):: {
    // Define some defaults.
    local updatedParams = {
      sync: "0",
      dataDir: "gs://kubeflow-examples-data/gh_issue_summarization/data",
      usrDir: "./github",
      problem: "github_issue_summarization_problem",
      model: "transformer_encoder",
      hparams: "transformer_github_issues",
      hparamsSet: "transformer_github_issues",      
      outputGCSPath: "gs://kubecon-gh-demo/gh-t2t-out/temp",
      gpuImage: null,
      cpuImage: null,
      trainSteps: 20000,
      evalSteps: 10,
      psGpu: 0,
      workerGpu: 0,
      workers: 3,
      masters: 1,
      ps: 1,
      jobName: "tensor2tensor",
    } + params,
    local containerEnv = [
      {
        name: "PYTHONPATH",
        value: "/home/jovyan",
      }
    ],
    local baseCommand = [      
      "/home/jovyan/github/t2t_launcher.sh",
      "--train_steps=" + updatedParams.trainSteps,
      "--hparams_set=" + updatedParams.hparams,
      "--model=" + updatedParams.model,
      "--problem=" + updatedParams.problem,
      "--t2t_usr_dir=" + updatedParams.usrDir,
      "--data_dir=" + updatedParams.dataDir,
      "--output_dir=" + updatedParams.outputGCSPath,
    ],
    local psCommand = baseCommand + [
      "--schedule=run_std_server",
    ],
    local totalWorkerReplicas = updatedParams.workers + updatedParams.masters,
    local workerBaseCommand = baseCommand + [
      "--schedule=train",
      "--sync=" + updatedParams.sync,
      "--ps_gpu=" + updatedParams.psGpu,
      "--worker_gpu=" + updatedParams.workerGpu,
      // We explicitly want to add worker and 
      "--worker_replicas=" + totalWorkerReplicas,
      "--ps_replicas=" + updatedParams.ps,
      "--eval_steps=" + updatedParams.evalSteps,
    ],
    local workerCommand = workerBaseCommand + [
      "--worker_job=/job:worker",
    ],
    local masterCommand = workerBaseCommand + [
      "--worker_job=/job:master",
    ],
    local namespace = env.namespace,
    job:: {
      apiVersion: "kubeflow.org/v1alpha1",
      kind: "TFJob",
      metadata: {
        name: updatedParams.jobName,
        namespace: env.namespace,
      },
      spec: {
        replicaSpecs: [
          {
            replicas: 1,
            template: {
              spec: {
                containers: [
                  {
                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
                    name: "tensorflow",
                    command: masterCommand,
                    env: containerEnv,
                    [if updatedParams.workerGpu > 0 then "resources"]: {
                      limits: {
                        "nvidia.com/gpu": updatedParams.workerGpu,
                      },
                    },
                  },
                ],
                restartPolicy: "OnFailure",
              },
            },
            tfReplicaType: "MASTER",
          },
          {
            replicas: updatedParams.workers,
            template: {
              spec: {
                containers: [
                  {
                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
                    name: "tensorflow",
                    command: workerCommand,
                    env: containerEnv,
                    [if updatedParams.workerGpu > 0 then "resources"]: {
                      limits: {
                        "nvidia.com/gpu": updatedParams.workerGpu,
                      },
                    },
                  },
                ],
                restartPolicy: "OnFailure",
              },
            },
            tfReplicaType: "WORKER",
          },
          {
            replicas: updatedParams.ps,
            template: {
              spec: {
                containers: [
                  {
                    image: updatedParams.cpuImage,
                    name: "tensorflow",
                    command: psCommand,
                    env: containerEnv,
                  },
                ],
                restartPolicy: "OnFailure",
              },
            },
            tfReplicaType: "PS",
          },
        ],
        terminationPolicy: {
          chief: {
            replicaIndex: 0,
            replicaName: "MASTER",
          },
        },
      },
    },  // job
  },  //parts
 }
--- a/github_issue_summarization/ks-kubeflow/components/tfjob-pvc.jsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/tfjob-pvc.jsonnet
@ -1,61 +0,0 @@
 // Train the model reading & writing the data from a PVC.
 local env = std.extVar("__ksonnet/environments");
 local params = std.extVar("__ksonnet/params").components["tfjob-pvc"];
 local k = import "k.libsonnet";
 local tfjob = {
    apiVersion: "kubeflow.org/v1alpha1",
    kind: "TFJob",
    metadata: {
      name: "tf-job-issue-summarization-pvc",
      namespace: env.namespace,
    },
    spec: {
      replicaSpecs: [
        {
          replicas: 1,
          template: {
            spec: {
              containers: [
                {
                  image: params.image,
                  name: "tensorflow",
                  volumeMounts: [
                    {
                      name: "data",
                      mountPath: "/data",                    
                    },
                  ],
                  command: [
                    "python",
                    "/workdir/train.py",
                    "--sample_size=" + std.toString(params.sample_size),
                    "--input_data=" + params.input_data,
                    "--output_model=" + params.output_model,                    
                  ],
                },
              ],
              volumes: [
                    {
 		            name: "data",
 		            persistentVolumeClaim: {
 		              claimName: "data-pvc",
 		            },
 		          },
              ],
              restartPolicy: "OnFailure",
            },
          },
          tfReplicaType: "MASTER",
        },
      ],
      terminationPolicy: {
        chief: {
          replicaIndex: 0,
          replicaName: "MASTER",
        },
      },
    },
  };
 std.prune(k.core.v1.list.new([tfjob]))
--- a/github_issue_summarization/ks-kubeflow/components/tfjob.jsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/tfjob.jsonnet
@ -1,7 +0,0 @@
 local env = std.extVar("__ksonnet/environments");
 local params = std.extVar("__ksonnet/params").components.tfjob;
 local k = import "k.libsonnet";
 local tfjob = import "tfjob.libsonnet";
 std.prune(k.core.v1.list.new([tfjob.parts(params)]))
--- a/github_issue_summarization/ks-kubeflow/components/tfjob.libsonnet
+++ b/github_issue_summarization/ks-kubeflow/components/tfjob.libsonnet
@ -1,67 +0,0 @@
 {
  parts(params):: {
    apiVersion: "kubeflow.org/v1alpha1",
    kind: "TFJob",
    metadata: {
      name: "tf-job-issue-summarization",
      namespace: params.namespace,
    },
    spec: {
      replicaSpecs: [
        {
          replicas: 1,
          template: {
            spec: {
              containers: [
                {
                  image: params.image,
                  name: "tensorflow",
                  volumeMounts: [
                    {
                      name: "gcp-credentials",
                      mountPath: "/secret/gcp-credentials",
                      readOnly: true,
                    },
                  ],
                  command: [
                    "python",
                  ],
                  args: [
                    "/workdir/train.py",
                    "--sample_size=" + std.toString(params.sample_size),
                    "--input_data_gcs_bucket=" + params.input_data_gcs_bucket,
                    "--input_data_gcs_path=" + params.input_data_gcs_path,
                    "--output_model_gcs_bucket=" + params.output_model_gcs_bucket,
                    "--output_model_gcs_path=" + params.output_model_gcs_path,
                  ],
                  env: [
                    {
                      name: "GOOGLE_APPLICATION_CREDENTIALS",
                      value: "/secret/gcp-credentials/key.json",
                    },
                  ],
                },
              ],
              volumes: [
                {
                  name: "gcp-credentials",
                  secret: {
                    secretName: "gcp-credentials",
                  },
                },
              ],
              restartPolicy: "OnFailure",
            },
          },
          tfReplicaType: "MASTER",
        },
      ],
      terminationPolicy: {
        chief: {
          replicaIndex: 0,
          replicaName: "MASTER",
        },
      },
    },
  },
 }