mirror of https://github.com/kubeflow/examples.git
Add components (#402)
Replace files that were mistakenly removed in #376
This commit is contained in:
parent
fa1311833c
commit
5e395c1a88
|
@ -0,0 +1,20 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.serving;
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
|
||||
local name = params.name;
|
||||
|
||||
// updatedParams includes the namespace from env by default.
|
||||
// We can override namespace in params if needed
|
||||
local updatedParams = env + params;
|
||||
|
||||
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
|
||||
local tfServing = tfServingBase {
|
||||
// Override parameters with user supplied parameters.
|
||||
params+: updatedParams {
|
||||
name: name,
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(tfServing.components))
|
|
@ -0,0 +1,198 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2tcpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
|
||||
sync: "0",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/CPU/training/yelp-model",
|
||||
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
psGpu: 0,
|
||||
workerGpu: 0,
|
||||
|
||||
workers: 3,
|
||||
masters: 1,
|
||||
ps: 1,
|
||||
|
||||
jobName: "t2tcpu",
|
||||
} + params;
|
||||
|
||||
local baseCommand = [
|
||||
"bash",
|
||||
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--hparams_set=" + updatedParams.hparams,
|
||||
"--model=" + updatedParams.model,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
];
|
||||
|
||||
local psCommand = baseCommand + [
|
||||
"--schedule=run_std_server",
|
||||
];
|
||||
|
||||
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
|
||||
|
||||
local workerBaseCommand = baseCommand + [
|
||||
"--schedule=train",
|
||||
"--sync=" + updatedParams.sync,
|
||||
"--ps_gpu=" + updatedParams.psGpu,
|
||||
"--worker_gpu=" + updatedParams.workerGpu,
|
||||
"--worker_replicas=" + totalWorkerReplicas,
|
||||
"--ps_replicas=" + updatedParams.ps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
];
|
||||
|
||||
local workerCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:worker",
|
||||
];
|
||||
|
||||
local masterCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:master",
|
||||
];
|
||||
|
||||
local gpuResources = {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
};
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local baseEnv = [
|
||||
{
|
||||
name: "PYTHONPATH",
|
||||
value: "/home/jovyan",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeEnv = baseEnv + [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumes = [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeImagePullSecrets = [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumeMounts = [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: masterCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Master
|
||||
|
||||
Worker: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: workerCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Worker
|
||||
Ps: {
|
||||
replicas: updatedParams.ps,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: psCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Ps
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2tgpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
sync: "0",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/GPU/training/yelp-model",
|
||||
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
psGpu: 0,
|
||||
workerGpu: 1,
|
||||
|
||||
workers: 3,
|
||||
masters: 1,
|
||||
ps: 1,
|
||||
|
||||
jobName: "t2tgpu",
|
||||
} + params;
|
||||
|
||||
local baseCommand = [
|
||||
"bash",
|
||||
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--hparams_set=" + updatedParams.hparams,
|
||||
"--model=" + updatedParams.model,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
];
|
||||
|
||||
local psCommand = baseCommand + [
|
||||
"--schedule=run_std_server",
|
||||
];
|
||||
|
||||
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
|
||||
|
||||
local workerBaseCommand = baseCommand + [
|
||||
"--schedule=train",
|
||||
"--sync=" + updatedParams.sync,
|
||||
"--ps_gpu=" + updatedParams.psGpu,
|
||||
"--worker_gpu=" + updatedParams.workerGpu,
|
||||
"--worker_replicas=" + totalWorkerReplicas,
|
||||
"--ps_replicas=" + updatedParams.ps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
];
|
||||
|
||||
local workerCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:worker",
|
||||
];
|
||||
|
||||
local masterCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:master",
|
||||
];
|
||||
|
||||
local gpuResources = {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
};
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local baseEnv = [
|
||||
{
|
||||
name: "PYTHONPATH",
|
||||
value: "/home/jovyan",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeEnv = baseEnv + [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumes = [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeImagePullSecrets = [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumeMounts = [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: masterCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Master
|
||||
|
||||
Worker: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: workerCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Worker
|
||||
Ps: {
|
||||
replicas: updatedParams.ps,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: psCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Ps
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2ttpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/training/yelp-model-TPU",
|
||||
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
tpus: 8,
|
||||
|
||||
jobName: "t2ttpu",
|
||||
|
||||
tpuEndpoint: "$(KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS)",
|
||||
} + params;
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
annotations: {
|
||||
"tf-version.cloud-tpus.google.com": "1.9",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"--model=" + updatedParams.model,
|
||||
"--hparams_set=" + updatedParams.hparamsSet,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
"--use_tpu",
|
||||
"--master=" + updatedParams.tpuEndpoint,
|
||||
],
|
||||
command: [
|
||||
"t2t-trainer",
|
||||
],
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
resources: {
|
||||
"limits": {
|
||||
"cloud-tpus.google.com/v2": updatedParams.tpus,
|
||||
},
|
||||
requests: {
|
||||
memory: "1Gi",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
}, // spec
|
||||
}, // template
|
||||
}, // Master
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.ui;
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local ui = import "ui.libsonnet";
|
||||
|
||||
std.prune(k.core.v1.list.new(ui.parts(params, env)))
|
|
@ -0,0 +1,102 @@
|
|||
{
|
||||
parts(params, env):: [
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: "kubeflow-demo-ui",
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: kubeflow_demo_ui",
|
||||
"prefix: /kubeflow_demo/",
|
||||
"rewrite: /",
|
||||
"service: kubeflow-demo-ui:80",
|
||||
]),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "kubeflow-demo-ui",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
},
|
||||
{
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "kubeflow-demo-ui",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "kubeflow-demo-ui",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"app.py",
|
||||
"--model_url",
|
||||
"http://serving:8000/model/serving:predict",
|
||||
"--data_dir",
|
||||
"gs://kubeflow-demo-base/featurization/yelp-data-1000000",
|
||||
],
|
||||
command: [
|
||||
"python",
|
||||
],
|
||||
image: params.image,
|
||||
name: "kubeflow-demo-ui",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 80,
|
||||
},
|
||||
],
|
||||
"env": [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
],
|
||||
"volumeMounts": [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"imagePullSecrets": [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
],
|
||||
"volumes": [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
Loading…
Reference in New Issue