Add components (#402)

Replace files that were mistakenly removed in #376
This commit is contained in:
Michelle Casbon 2018-12-06 10:06:42 +11:00 committed by Kubernetes Prow Robot
parent fa1311833c
commit 5e395c1a88
6 changed files with 619 additions and 0 deletions

View File

@ -0,0 +1,20 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.serving;
local k = import "k.libsonnet";
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
local name = params.name;
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
local tfServing = tfServingBase {
// Override parameters with user supplied parameters.
params+: updatedParams {
name: name,
},
};
std.prune(k.core.v1.list.new(tfServing.components))

View File

@ -0,0 +1,198 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2tcpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
sync: "0",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/CPU/training/yelp-model",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
trainSteps: 1000,
evalSteps: 10,
psGpu: 0,
workerGpu: 0,
workers: 3,
masters: 1,
ps: 1,
jobName: "t2tcpu",
} + params;
local baseCommand = [
"bash",
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
"--train_steps=" + updatedParams.trainSteps,
"--hparams_set=" + updatedParams.hparams,
"--model=" + updatedParams.model,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
];
local psCommand = baseCommand + [
"--schedule=run_std_server",
];
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
local workerBaseCommand = baseCommand + [
"--schedule=train",
"--sync=" + updatedParams.sync,
"--ps_gpu=" + updatedParams.psGpu,
"--worker_gpu=" + updatedParams.workerGpu,
"--worker_replicas=" + totalWorkerReplicas,
"--ps_replicas=" + updatedParams.ps,
"--eval_steps=" + updatedParams.evalSteps,
];
local workerCommand = workerBaseCommand + [
"--worker_job=/job:worker",
];
local masterCommand = workerBaseCommand + [
"--worker_job=/job:master",
];
local gpuResources = {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
};
local cloud = std.toString(updatedParams.cloud);
local baseEnv = [
{
name: "PYTHONPATH",
value: "/home/jovyan",
},
];
local nonGkeEnv = baseEnv + [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
];
local nonGkeVolumes = [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
];
local nonGkeImagePullSecrets = [
{
name: "gcp-registry-credentials",
},
];
local nonGkeVolumeMounts = [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
];
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
spec: {
containers: [
{
command: masterCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Master
Worker: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
command: workerCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Worker
Ps: {
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
command: psCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: updatedParams.cpuImage,
name: "tensorflow",
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Ps
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -0,0 +1,197 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2tgpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
sync: "0",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/GPU/training/yelp-model",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
trainSteps: 1000,
evalSteps: 10,
psGpu: 0,
workerGpu: 1,
workers: 3,
masters: 1,
ps: 1,
jobName: "t2tgpu",
} + params;
local baseCommand = [
"bash",
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
"--train_steps=" + updatedParams.trainSteps,
"--hparams_set=" + updatedParams.hparams,
"--model=" + updatedParams.model,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
];
local psCommand = baseCommand + [
"--schedule=run_std_server",
];
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
local workerBaseCommand = baseCommand + [
"--schedule=train",
"--sync=" + updatedParams.sync,
"--ps_gpu=" + updatedParams.psGpu,
"--worker_gpu=" + updatedParams.workerGpu,
"--worker_replicas=" + totalWorkerReplicas,
"--ps_replicas=" + updatedParams.ps,
"--eval_steps=" + updatedParams.evalSteps,
];
local workerCommand = workerBaseCommand + [
"--worker_job=/job:worker",
];
local masterCommand = workerBaseCommand + [
"--worker_job=/job:master",
];
local gpuResources = {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
};
local cloud = std.toString(updatedParams.cloud);
local baseEnv = [
{
name: "PYTHONPATH",
value: "/home/jovyan",
},
];
local nonGkeEnv = baseEnv + [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
];
local nonGkeVolumes = [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
];
local nonGkeImagePullSecrets = [
{
name: "gcp-registry-credentials",
},
];
local nonGkeVolumeMounts = [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
];
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
spec: {
containers: [
{
command: masterCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Master
Worker: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
command: workerCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Worker
Ps: {
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
command: psCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: updatedParams.cpuImage,
name: "tensorflow",
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Ps
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -0,0 +1,95 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2ttpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/training/yelp-model-TPU",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
trainSteps: 1000,
evalSteps: 10,
tpus: 8,
jobName: "t2ttpu",
tpuEndpoint: "$(KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS)",
} + params;
local cloud = std.toString(updatedParams.cloud);
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
metadata: {
annotations: {
"tf-version.cloud-tpus.google.com": "1.9",
},
},
spec: {
containers: [
{
args: [
"--model=" + updatedParams.model,
"--hparams_set=" + updatedParams.hparamsSet,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--train_steps=" + updatedParams.trainSteps,
"--eval_steps=" + updatedParams.evalSteps,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
"--use_tpu",
"--master=" + updatedParams.tpuEndpoint,
],
command: [
"t2t-trainer",
],
image: updatedParams.cpuImage,
name: "tensorflow",
resources: {
"limits": {
"cloud-tpus.google.com/v2": updatedParams.tpus,
},
requests: {
memory: "1Gi",
},
},
},
],
restartPolicy: "OnFailure",
}, // spec
}, // template
}, // Master
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -0,0 +1,7 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.ui;
local k = import "k.libsonnet";
local ui = import "ui.libsonnet";
std.prune(k.core.v1.list.new(ui.parts(params, env)))

View File

@ -0,0 +1,102 @@
{
parts(params, env):: [
{
apiVersion: "v1",
kind: "Service",
metadata: {
name: "kubeflow-demo-ui",
namespace: env.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: kubeflow_demo_ui",
"prefix: /kubeflow_demo/",
"rewrite: /",
"service: kubeflow-demo-ui:80",
]),
},
},
spec: {
ports: [
{
port: 80,
targetPort: 80,
},
],
selector: {
app: "kubeflow-demo-ui",
},
type: "ClusterIP",
},
},
{
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "kubeflow-demo-ui",
namespace: env.namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "kubeflow-demo-ui",
},
},
spec: {
containers: [
{
args: [
"app.py",
"--model_url",
"http://serving:8000/model/serving:predict",
"--data_dir",
"gs://kubeflow-demo-base/featurization/yelp-data-1000000",
],
command: [
"python",
],
image: params.image,
name: "kubeflow-demo-ui",
ports: [
{
containerPort: 80,
},
],
"env": [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
],
"volumeMounts": [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
],
},
],
"imagePullSecrets": [
{
name: "gcp-registry-credentials",
},
],
"volumes": [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
],
},
},
},
},
],
}