Add components (#402)

Replace files that were mistakenly removed in #376
2018-12-06 10:06:42 +11:00 · 2018-12-06 10:06:42 +11:00 · 5e395c1a88
parent fa1311833c
commit 5e395c1a88
6 changed files with 619 additions and 0 deletions
--- a/demos/yelp_demo/ks_app/components/serving.jsonnet
+++ b/demos/yelp_demo/ks_app/components/serving.jsonnet
@ -0,0 +1,20 @@
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components.serving;
+local k = import "k.libsonnet";
+
+// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
+local name = params.name;
+
+// updatedParams includes the namespace from env by default.
+// We can override namespace in params if needed
+local updatedParams = env + params;
+
+local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
+local tfServing = tfServingBase {
+  // Override parameters with user supplied parameters.
+  params+: updatedParams {
+    name: name,
+  },
+};
+
+std.prune(k.core.v1.list.new(tfServing.components))
--- a/demos/yelp_demo/ks_app/components/t2tcpu.jsonnet
+++ b/demos/yelp_demo/ks_app/components/t2tcpu.jsonnet
@ -0,0 +1,198 @@
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components["t2tcpu"];
+
+local k = import "k.libsonnet";
+
+local name = params.name;
+local namespace = env.namespace;
+
+local updatedParams = {
+  cloud: "gke",
+
+  sync: "0",
+
+  dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
+  usrDir: "./yelp_sentiment",
+  problem: "yelp_sentiment",
+
+  model: "transformer_encoder",
+  hparams: "transformer_yelp_sentiment",
+  hparamsSet: "transformer_yelp_sentiment",
+
+  outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/CPU/training/yelp-model",
+
+  gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
+  cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
+
+  trainSteps: 1000,
+  evalSteps: 10,
+
+  psGpu: 0,
+  workerGpu: 0,
+
+  workers: 3,
+  masters: 1,
+  ps: 1,
+
+  jobName: "t2tcpu",
+} + params;
+
+local baseCommand = [
+  "bash",
+  "/home/jovyan/yelp_sentiment/worker_launcher.sh",
+  "--train_steps=" + updatedParams.trainSteps,
+  "--hparams_set=" + updatedParams.hparams,
+  "--model=" + updatedParams.model,
+  "--problem=" + updatedParams.problem,
+  "--t2t_usr_dir=" + updatedParams.usrDir,
+  "--data_dir=" + updatedParams.dataDir,
+  "--output_dir=" + updatedParams.outputGCSPath,
+];
+
+local psCommand = baseCommand + [
+  "--schedule=run_std_server",
+];
+
+local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
+
+local workerBaseCommand = baseCommand + [
+  "--schedule=train",
+  "--sync=" + updatedParams.sync,
+  "--ps_gpu=" + updatedParams.psGpu,
+  "--worker_gpu=" + updatedParams.workerGpu,
+  "--worker_replicas=" + totalWorkerReplicas,
+  "--ps_replicas=" + updatedParams.ps,
+  "--eval_steps=" + updatedParams.evalSteps,
+];
+
+local workerCommand = workerBaseCommand + [
+  "--worker_job=/job:worker",
+];
+
+local masterCommand = workerBaseCommand + [
+  "--worker_job=/job:master",
+];
+
+local gpuResources = {
+  limits: {
+    "nvidia.com/gpu": updatedParams.workerGpu,
+  },
+};
+
+local cloud = std.toString(updatedParams.cloud);
+
+local baseEnv = [
+  {
+    name: "PYTHONPATH",
+    value: "/home/jovyan",
+  },
+];
+
+local nonGkeEnv = baseEnv + [
+  {
+    name: "GOOGLE_APPLICATION_CREDENTIALS",
+    value: "/secret/gcp-credentials/key.json"
+  },
+];
+
+local nonGkeVolumes = [
+  {
+    name: "gcp-credentials",
+    secret: {
+      secretName: "gcp-credentials",
+    },
+  },
+];
+
+local nonGkeImagePullSecrets = [
+  {
+    name: "gcp-registry-credentials",
+  },
+];
+
+local nonGkeVolumeMounts = [
+  {
+    mountPath: "/secret/gcp-credentials",
+    name: "gcp-credentials",
+  },
+];
+
+local tfjob = {
+  apiVersion: "kubeflow.org/v1alpha2",
+  kind: "TFJob",
+  metadata: {
+    name: updatedParams.jobName,
+    namespace: namespace,
+  },
+  spec: {
+    tfReplicaSpecs: {
+      Master: {
+        replicas: 1,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: masterCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            },
+      }, // Master
+
+      Worker: {
+        replicas: updatedParams.workers,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: workerCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            },
+      }, // Worker
+      Ps: {
+        replicas: updatedParams.ps,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: psCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            }, 
+    }, // Ps    
+  }, // tfReplicaSpecs
+ }, // Spec
+}; // tfJob
+
+k.core.v1.list.new([
+  tfjob,
+])
+
+
+
--- a/demos/yelp_demo/ks_app/components/t2tgpu.jsonnet
+++ b/demos/yelp_demo/ks_app/components/t2tgpu.jsonnet
@ -0,0 +1,197 @@
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components["t2tgpu"];
+
+local k = import "k.libsonnet";
+
+local name = params.name;
+local namespace = env.namespace;
+
+local updatedParams = {
+  cloud: "gke",
+  sync: "0",
+
+  dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
+  usrDir: "./yelp_sentiment",
+  problem: "yelp_sentiment",
+
+  model: "transformer_encoder",
+  hparams: "transformer_yelp_sentiment",
+  hparamsSet: "transformer_yelp_sentiment",
+
+  outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/GPU/training/yelp-model",
+
+  gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
+  cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
+
+  trainSteps: 1000,
+  evalSteps: 10,
+
+  psGpu: 0,
+  workerGpu: 1,
+
+  workers: 3,
+  masters: 1,
+  ps: 1,
+
+  jobName: "t2tgpu",
+} + params;
+
+local baseCommand = [
+  "bash",
+  "/home/jovyan/yelp_sentiment/worker_launcher.sh",
+  "--train_steps=" + updatedParams.trainSteps,
+  "--hparams_set=" + updatedParams.hparams,
+  "--model=" + updatedParams.model,
+  "--problem=" + updatedParams.problem,
+  "--t2t_usr_dir=" + updatedParams.usrDir,
+  "--data_dir=" + updatedParams.dataDir,
+  "--output_dir=" + updatedParams.outputGCSPath,
+];
+
+local psCommand = baseCommand + [
+  "--schedule=run_std_server",
+];
+
+local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
+
+local workerBaseCommand = baseCommand + [
+  "--schedule=train",
+  "--sync=" + updatedParams.sync,
+  "--ps_gpu=" + updatedParams.psGpu,
+  "--worker_gpu=" + updatedParams.workerGpu,
+  "--worker_replicas=" + totalWorkerReplicas,
+  "--ps_replicas=" + updatedParams.ps,
+  "--eval_steps=" + updatedParams.evalSteps,
+];
+
+local workerCommand = workerBaseCommand + [
+  "--worker_job=/job:worker",
+];
+
+local masterCommand = workerBaseCommand + [
+  "--worker_job=/job:master",
+];
+
+local gpuResources = {
+  limits: {
+    "nvidia.com/gpu": updatedParams.workerGpu,
+  },
+};
+
+local cloud = std.toString(updatedParams.cloud);
+
+local baseEnv = [
+  {
+    name: "PYTHONPATH",
+    value: "/home/jovyan",
+  },
+];
+
+local nonGkeEnv = baseEnv + [
+  {
+    name: "GOOGLE_APPLICATION_CREDENTIALS",
+    value: "/secret/gcp-credentials/key.json"
+  },
+];
+
+local nonGkeVolumes = [
+  {
+    name: "gcp-credentials",
+    secret: {
+      secretName: "gcp-credentials",
+    },
+  },
+];
+
+local nonGkeImagePullSecrets = [
+  {
+    name: "gcp-registry-credentials",
+  },
+];
+
+local nonGkeVolumeMounts = [
+  {
+    mountPath: "/secret/gcp-credentials",
+    name: "gcp-credentials",
+  },
+];
+
+local tfjob = {
+  apiVersion: "kubeflow.org/v1alpha2",
+  kind: "TFJob",
+  metadata: {
+    name: updatedParams.jobName,
+    namespace: namespace,
+  },
+  spec: {
+    tfReplicaSpecs: {
+      Master: {
+        replicas: 1,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: masterCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            },
+      }, // Master
+
+      Worker: {
+        replicas: updatedParams.workers,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: workerCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            },
+      }, // Worker
+      Ps: {
+        replicas: updatedParams.ps,
+        template: {
+              spec: {
+                containers: [
+                  {
+                    command: psCommand,
+                    env: if cloud != "gke" then nonGkeEnv else baseEnv,
+                    image: updatedParams.cpuImage,
+                    name: "tensorflow",
+                    [if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
+                  },
+                ],
+                [if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
+                restartPolicy: "OnFailure",
+                [if cloud != "gke" then "volumes"]: nonGkeVolumes,
+              },
+            }, 
+    }, // Ps    
+  }, // tfReplicaSpecs
+ }, // Spec
+}; // tfJob
+
+k.core.v1.list.new([
+  tfjob,
+])
+
+
+
--- a/demos/yelp_demo/ks_app/components/t2ttpu.jsonnet
+++ b/demos/yelp_demo/ks_app/components/t2ttpu.jsonnet
@ -0,0 +1,95 @@
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components["t2ttpu"];
+
+local k = import "k.libsonnet";
+
+local name = params.name;
+local namespace = env.namespace;
+
+local updatedParams = {
+      cloud: "gke",
+
+      dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
+      usrDir: "./yelp_sentiment",
+      problem: "yelp_sentiment",
+
+      model: "transformer_encoder",
+      hparams: "transformer_yelp_sentiment",
+      hparamsSet: "transformer_yelp_sentiment",
+
+      outputGCSPath: "gs://kubeflow-demo-base/training/yelp-model-TPU",
+
+      cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
+      gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
+
+      trainSteps: 1000,
+      evalSteps: 10,
+
+      tpus: 8,
+
+      jobName: "t2ttpu",
+
+      tpuEndpoint: "$(KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS)",
+} + params;
+
+local cloud = std.toString(updatedParams.cloud);
+
+local tfjob = {
+  apiVersion: "kubeflow.org/v1alpha2",
+  kind: "TFJob",
+  metadata: {
+    name: updatedParams.jobName,
+    namespace: namespace,
+  },
+  spec: {
+    tfReplicaSpecs: {
+      Master: {
+        replicas: 1,
+        template: {
+          metadata: {
+            annotations: {
+              "tf-version.cloud-tpus.google.com": "1.9",
+            },
+          },
+          spec: {
+            containers: [
+              {
+                args: [
+                  "--model=" + updatedParams.model,
+                  "--hparams_set=" + updatedParams.hparamsSet,
+                  "--problem=" + updatedParams.problem,
+	          "--t2t_usr_dir=" + updatedParams.usrDir,
+                  "--train_steps=" + updatedParams.trainSteps,
+                  "--eval_steps=" + updatedParams.evalSteps,
+                  "--data_dir=" + updatedParams.dataDir,
+                  "--output_dir=" + updatedParams.outputGCSPath,
+                  "--use_tpu",
+                  "--master=" + updatedParams.tpuEndpoint,
+                ],
+                command: [
+                  "t2t-trainer",
+                ],
+                image: updatedParams.cpuImage,
+                name: "tensorflow",
+                resources: {
+                  "limits": {
+                    "cloud-tpus.google.com/v2": updatedParams.tpus,
+                  },
+                  requests: {
+                    memory: "1Gi",
+                  },
+                },
+              },
+            ],
+            restartPolicy: "OnFailure",
+          }, // spec
+        }, // template
+      }, // Master
+    }, // tfReplicaSpecs
+  }, // Spec
+}; // tfJob
+
+k.core.v1.list.new([
+  tfjob,
+])
+
--- a/demos/yelp_demo/ks_app/components/ui.jsonnet
+++ b/demos/yelp_demo/ks_app/components/ui.jsonnet
@ -0,0 +1,7 @@
+local env = std.extVar("__ksonnet/environments");
+local params = std.extVar("__ksonnet/params").components.ui;
+local k = import "k.libsonnet";
+
+local ui = import "ui.libsonnet";
+
+std.prune(k.core.v1.list.new(ui.parts(params, env)))
--- a/demos/yelp_demo/ks_app/components/ui.libsonnet
+++ b/demos/yelp_demo/ks_app/components/ui.libsonnet
@ -0,0 +1,102 @@
+{
+  parts(params, env):: [
+    {
+      apiVersion: "v1",
+      kind: "Service",
+      metadata: {
+        name: "kubeflow-demo-ui",
+        namespace: env.namespace,
+        annotations: {
+          "getambassador.io/config":
+            std.join("\n", [
+              "---",
+              "apiVersion: ambassador/v0",
+              "kind: Mapping",
+              "name: kubeflow_demo_ui",
+              "prefix: /kubeflow_demo/",
+              "rewrite: /",
+              "service: kubeflow-demo-ui:80",
+            ]),
+        },
+      },
+      spec: {
+        ports: [
+          {
+            port: 80,
+            targetPort: 80,
+          },
+        ],
+        selector: {
+          app: "kubeflow-demo-ui",
+        },
+        type: "ClusterIP",
+      },
+    },
+    {
+      apiVersion: "apps/v1beta1",
+      kind: "Deployment",
+      metadata: {
+        name: "kubeflow-demo-ui",
+        namespace: env.namespace,
+      },
+      spec: {
+        replicas: 1,
+        template: {
+          metadata: {
+            labels: {
+              app: "kubeflow-demo-ui",
+            },
+          },
+          spec: {
+            containers: [
+              {
+		args: [
+		  "app.py",
+		  "--model_url",
+		  "http://serving:8000/model/serving:predict",
+		  "--data_dir",
+		  "gs://kubeflow-demo-base/featurization/yelp-data-1000000",
+		],
+		command: [
+		  "python",
+		],
+                image: params.image,
+                name: "kubeflow-demo-ui",
+                ports: [
+                  {
+                    containerPort: 80,
+                  },
+                ],
+	        "env": [
+                  {
+                    name: "GOOGLE_APPLICATION_CREDENTIALS",
+                    value: "/secret/gcp-credentials/key.json"
+                  },
+                ],
+                "volumeMounts": [
+                  {
+                    mountPath: "/secret/gcp-credentials",
+                    name: "gcp-credentials",
+                  },
+                ],
+              },
+            ],
+            "imagePullSecrets": [
+              {
+                name: "gcp-registry-credentials",
+              },
+            ],
+            "volumes": [
+              {
+                name: "gcp-credentials",
+                secret: {
+                  secretName: "gcp-credentials",
+                },
+              },
+            ],
+          },
+        },
+      },
+    },
+  ],
+}