mirror of https://github.com/kubeflow/examples.git
Add tensorboard and check in vendor for the code search example. (#255)
* Add tensorboard and check in vendor for the code search example. * * Remove the default env; when I ran ks show I got errors but removing it and adding a fresh env worked. It also won't point to the correct cluster for users.
This commit is contained in:
parent
2064b43def
commit
adf614fc5f
|
@ -1,13 +1,10 @@
|
|||
apiVersion: 0.2.0
|
||||
environments:
|
||||
default:
|
||||
destination:
|
||||
namespace: kubeflow
|
||||
server: https://35.237.202.148
|
||||
k8sVersion: v1.9.7
|
||||
path: default
|
||||
kind: ksonnet.io/app
|
||||
libraries:
|
||||
examples:
|
||||
name: examples
|
||||
registry: kubeflow
|
||||
version: defc235463799d5600001ee0ed6ef68f7af24a17
|
||||
tf-serving:
|
||||
name: tf-serving
|
||||
registry: kubeflow
|
||||
|
|
|
@ -79,5 +79,10 @@
|
|||
indexFile: $.components['t2t-code-search'].workingDir + '/code_search_index.nmslib',
|
||||
servingUrl: 'http://t2t-code-search.kubeflow:9001/v1/models/t2t-code-search:predict',
|
||||
},
|
||||
tensorboard: {
|
||||
image: "tensorflow/tensorflow:1.8.0",
|
||||
logDir: "gs://example/to/model/logdir",
|
||||
name: "tensorboard",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -3,5 +3,4 @@ local t2tJob = import "t2t-job.libsonnet";
|
|||
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2t-code-search-trainer"];
|
||||
|
||||
std.prune(k.core.v1.list.new([t2tJob.parts(params, env).job]))
|
||||
|
|
|
@ -21,35 +21,35 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
],
|
||||
|
||||
getTrainerCmd(params):: {
|
||||
local trainer = [
|
||||
"/usr/local/sbin/t2t-entrypoint",
|
||||
"t2t-trainer",
|
||||
"--problem=" + params.problem,
|
||||
"--model=" + params.model,
|
||||
"--hparams_set=" + params.hparams_set,
|
||||
"--data_dir=" + params.dataDir,
|
||||
"--output_dir=" + params.outputDir,
|
||||
"--train_steps=" + std.toString(params.train_steps),
|
||||
"--eval_steps=" + std.toString(params.eval_steps),
|
||||
"--t2t_usr_dir=/app/code_search/t2t",
|
||||
],
|
||||
local trainer = [
|
||||
"/usr/local/sbin/t2t-entrypoint",
|
||||
"t2t-trainer",
|
||||
"--problem=" + params.problem,
|
||||
"--model=" + params.model,
|
||||
"--hparams_set=" + params.hparams_set,
|
||||
"--data_dir=" + params.dataDir,
|
||||
"--output_dir=" + params.outputDir,
|
||||
"--train_steps=" + std.toString(params.train_steps),
|
||||
"--eval_steps=" + std.toString(params.eval_steps),
|
||||
"--t2t_usr_dir=/app/code_search/t2t",
|
||||
],
|
||||
|
||||
worker: trainer,
|
||||
worker: trainer,
|
||||
|
||||
worker_dist: trainer + [
|
||||
"--schedule=train",
|
||||
"--ps_gpu=" + std.toString(params.numPsGpu),
|
||||
"--worker_gpu=" + std.toString(params.numWorkerGpu),
|
||||
"--worker_replicas=" + std.toString(params.numWorker),
|
||||
"--ps_replicas=" + std.toString(params.numPs),
|
||||
"--eval_steps=" + std.toString(params.eval_steps),
|
||||
"--worker_job=/job:worker",
|
||||
],
|
||||
worker_dist: trainer + [
|
||||
"--schedule=train",
|
||||
"--ps_gpu=" + std.toString(params.numPsGpu),
|
||||
"--worker_gpu=" + std.toString(params.numWorkerGpu),
|
||||
"--worker_replicas=" + std.toString(params.numWorker),
|
||||
"--ps_replicas=" + std.toString(params.numPs),
|
||||
"--eval_steps=" + std.toString(params.eval_steps),
|
||||
"--worker_job=/job:worker",
|
||||
],
|
||||
|
||||
ps: trainer + [
|
||||
"--schedule=run_std_server",
|
||||
"--ps_job=/job:ps",
|
||||
],
|
||||
ps: trainer + [
|
||||
"--schedule=run_std_server",
|
||||
"--ps_job=/job:ps",
|
||||
],
|
||||
},
|
||||
|
||||
tfJobReplica(replicaType, number, args, image, numGpus=0, imagePullSecrets=[], env=[], volumes=[], volumeMounts=[])::
|
||||
|
@ -69,7 +69,7 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
replicas: number,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [ containerSpec ],
|
||||
containers: [containerSpec],
|
||||
[if std.length(imagePullSecrets) > 0 then "imagePullSecrets"]: imagePullSecrets,
|
||||
[if std.length(volumes) > 0 then "volumes"]: volumes,
|
||||
// restartPolicy: "OnFailure",
|
||||
|
@ -84,7 +84,7 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
local workerEnv = [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json"
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
local workerVolumes = [
|
||||
|
@ -104,8 +104,8 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
|
||||
local cmd = $.getTrainerCmd(params),
|
||||
local workerCmd = if params.jobType == "exporter" then $.getExporterCmd(params)
|
||||
else if params.jobType == "datagen" then $.getDatagenCmd(params)
|
||||
else cmd.worker,
|
||||
else if params.jobType == "datagen" then $.getDatagenCmd(params)
|
||||
else cmd.worker,
|
||||
|
||||
job:: {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
|
@ -116,14 +116,19 @@ local baseParams = std.extVar("__ksonnet/params").components["t2t-job"];
|
|||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
[if params.numPs > 0 then "PS"]: $.tfJobReplica("PS", params.numPs, cmd.ps, workerImage,
|
||||
[if params.numPs > 0 then "PS"]: $.tfJobReplica("PS",
|
||||
params.numPs,
|
||||
cmd.ps,
|
||||
workerImage,
|
||||
numGpus=params.numPsGpu,
|
||||
env=workerEnv,
|
||||
volumes=workerVolumes,
|
||||
volumeMounts=workerVolumeMounts),
|
||||
[if params.numWorker > 0 then "Worker"]: $.tfJobReplica("WORKER", params.numWorker,
|
||||
workerCmd, workerImage,
|
||||
numGpus=params.numPsGpu,
|
||||
[if params.numWorker > 0 then "Worker"]: $.tfJobReplica("WORKER",
|
||||
params.numWorker,
|
||||
workerCmd,
|
||||
workerImage,
|
||||
numGpus=params.numWorkerGpu,
|
||||
env=workerEnv,
|
||||
volumes=workerVolumes,
|
||||
volumeMounts=workerVolumeMounts),
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.tensorboard;
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
local service = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: name + "-tb",
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: " + name + "_mapping",
|
||||
"prefix: /tensorboard/" + name + "/",
|
||||
"rewrite: /",
|
||||
"service: " + name + "-tb." + namespace,
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "http",
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tensorboard",
|
||||
"tb-job": name,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
local deployment = {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: name + "-tb",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tensorboard",
|
||||
"tb-job": name,
|
||||
},
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"/usr/local/bin/tensorboard",
|
||||
"--logdir=" + params.logDir,
|
||||
"--port=80",
|
||||
],
|
||||
image: params.image,
|
||||
name: "tensorboard",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 80,
|
||||
},
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
|
||||
volumes: [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "user-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([service, deployment]))
|
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"name": "kubeflow examples",
|
||||
"apiVersion": "0.0.1",
|
||||
"kind": "ksonnet.io/parts",
|
||||
"description": "kubeflow examples.\n",
|
||||
"author": "kubeflow-team <kubeflow-discuss@googlegroups.com>",
|
||||
"contributors": [
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/kubeflow/kubeflow"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/kubeflow/kubeflow/issues"
|
||||
},
|
||||
"keywords": [
|
||||
"kubernetes",
|
||||
"kubeflow",
|
||||
"machine learning"
|
||||
],
|
||||
"license": "Apache 2.0",
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tensorboard
|
||||
// @description Prototype for Tensorboard deployments
|
||||
// @shortDescription Prototype for Tensorboard deployments
|
||||
// @param name string Name to give to the tensorboard deployment
|
||||
// @param logDir string The path containing your TF events files.
|
||||
// @optionalParam image string tensorflow/tensorflow:1.8.0 The Docker image to use.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
local service = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: name + "-tb",
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: " + name + "_mapping",
|
||||
"prefix: /tensorboard/" + name + "/",
|
||||
"rewrite: /",
|
||||
"service: " + name + "-tb." + namespace,
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "http",
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tensorboard",
|
||||
"tb-job": name,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
local deployment = {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: name + "-tb",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tensorboard",
|
||||
"tb-job": name,
|
||||
},
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"/usr/local/bin/tensorboard",
|
||||
"--logdir=" + params.logDir,
|
||||
"--port=80",
|
||||
],
|
||||
image: params.image,
|
||||
name: "tensorboard",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 80,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([service, deployment]))
|
|
@ -0,0 +1,143 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-batch-predict
|
||||
// @description TensorFlow batch-predict
|
||||
// @shortDescription A TensorFlow batch-predict job
|
||||
// @param name string Name to give to each of the components
|
||||
// @optionalParam numGpus number 0 number of GPUs to use
|
||||
// @param modelPath string 0 Path to the model directory
|
||||
// @param inputFilePatterns string Input file patterns
|
||||
// @param outputResultPrefix string Output result file prefix
|
||||
// @param outputErrorPrefix string Output error file prefix
|
||||
// @optionalParam batchSize number 8 Batch size
|
||||
// @optionalParam gcpCredentialSecretName string Secret name if used in GCP
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local tfBatchPredictBase = {
|
||||
local base = self,
|
||||
|
||||
// Parameters are intended to be late bound.
|
||||
params:: {
|
||||
name: null,
|
||||
labels: {
|
||||
app: base.params.name,
|
||||
},
|
||||
modelName: self.name,
|
||||
modelPath: null,
|
||||
inputFilePatterns: null,
|
||||
inputFileFormat: null,
|
||||
outputResultPrefix: null,
|
||||
outputErrorPrefix: null,
|
||||
batchSize: 8,
|
||||
numGpus: 0,
|
||||
gcpCredentialSecretName: "",
|
||||
|
||||
version: "v1",
|
||||
|
||||
// If users want to override the image then can override defaultCpuImage and/or defaultGpuImage
|
||||
// in which case the image used will still depend on whether GPUs are used or not.
|
||||
// Users can also override the predictImage in which case the user supplied value will always be used
|
||||
// regardless of numGpus.
|
||||
defaultCpuImage: "gcr.io/kubeflow-examples/batch-predict:tf18",
|
||||
defaultGpuImage: "gcr.io/kubeflow-examples/batch-predict:tf18-gpu",
|
||||
|
||||
predictImage: if self.numGpus == 0 then
|
||||
self.defaultCpuImage
|
||||
else
|
||||
self.defaultGpuImage,
|
||||
},
|
||||
|
||||
parts:: {
|
||||
bpJob: {
|
||||
apiVersion: "batch/v1",
|
||||
kind: "Job",
|
||||
metadata: {
|
||||
name: base.params.name + "-" + base.params.version,
|
||||
namespace: base.params.namespace,
|
||||
labels: base.params.labels,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: base.params.labels,
|
||||
},
|
||||
backoffLimit: 1,
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
name: base.params.name,
|
||||
image: base.params.predictImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
args: [
|
||||
"--model_dir=" + base.params.modelPath,
|
||||
"--input_file_patterns=" + base.params.inputFilePatterns,
|
||||
"--input_file_format=" + base.params.inputFileFormat,
|
||||
"--output_result_prefix=" + base.params.outputResultPrefix,
|
||||
"--output_error_prefix=" + base.params.outputErrorPrefix,
|
||||
"--batch_size=" + base.params.batchSize,
|
||||
],
|
||||
|
||||
env:
|
||||
if base.params.gcpCredentialSecretName != "" then
|
||||
[{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json",
|
||||
}]
|
||||
else [],
|
||||
|
||||
resources: {
|
||||
limits: {
|
||||
[if base.params.numGpus > 0 then "nvidia.com/gpu"]: base.params.numGpus,
|
||||
},
|
||||
},
|
||||
|
||||
volumeMounts+: if base.params.gcpCredentialSecretName != "" then [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
readOnly: true,
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
},
|
||||
],
|
||||
}, // container
|
||||
], // containers
|
||||
|
||||
restartPolicy: "Never",
|
||||
activeDeadlineSeconds: 3000,
|
||||
// See: https://github.com/kubeflow/kubeflow/tree/master/components/k8s-model-server#set-the-user-optional
|
||||
// The is user and group should be defined in the Docker image.
|
||||
// Per best practices we don't run as the root user.
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
fsGroup: 1000,
|
||||
},
|
||||
volumes:
|
||||
if base.params.gcpCredentialSecretName != "" then [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: base.params.gcpCredentialSecretName,
|
||||
},
|
||||
},
|
||||
] else [],
|
||||
}, // template spec
|
||||
}, // template
|
||||
}, // overall spec
|
||||
}, // bpJob
|
||||
}, // parts
|
||||
};
|
||||
|
||||
|
||||
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
|
||||
local name = params.name;
|
||||
|
||||
local updatedParams = env + params;
|
||||
|
||||
local tfBatchPredict = tfBatchPredictBase {
|
||||
// Override parameters with user supplied parameters.
|
||||
params+: updatedParams {
|
||||
name: name,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
std.prune(k.core.v1.list.new([tfBatchPredict.parts.bpJob]))
|
|
@ -0,0 +1,91 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-job-simple-v1alpha1
|
||||
// @description tf-job-simple
|
||||
// @shortDescription A simple TFJob to run CNN benchmark
|
||||
// @param name string Name to give to each of the components
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = import "param://name";
|
||||
local namespace = "default";
|
||||
local image = "gcr.io/kubeflow/tf-benchmarks-cpu:v20171202-bdab599-dirty-284af3";
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha1",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicaSpecs: [
|
||||
{
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"python",
|
||||
"tf_cnn_benchmarks.py",
|
||||
"--batch_size=32",
|
||||
"--model=resnet50",
|
||||
"--variable_update=parameter_server",
|
||||
"--flush_stdout=true",
|
||||
"--num_gpus=1",
|
||||
"--local_parameter_device=cpu",
|
||||
"--device=cpu",
|
||||
"--data_format=NHWC",
|
||||
],
|
||||
image: image,
|
||||
name: "tensorflow",
|
||||
workingDir: "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks",
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
tfReplicaType: "WORKER",
|
||||
},
|
||||
{
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"python",
|
||||
"tf_cnn_benchmarks.py",
|
||||
"--batch_size=32",
|
||||
"--model=resnet50",
|
||||
"--variable_update=parameter_server",
|
||||
"--flush_stdout=true",
|
||||
"--num_gpus=1",
|
||||
"--local_parameter_device=cpu",
|
||||
"--device=cpu",
|
||||
"--data_format=NHWC",
|
||||
],
|
||||
image: image,
|
||||
name: "tensorflow",
|
||||
workingDir: "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks",
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
tfReplicaType: "PS",
|
||||
},
|
||||
],
|
||||
terminationPolicy: {
|
||||
chief: {
|
||||
replicaIndex: 0,
|
||||
replicaName: "WORKER",
|
||||
},
|
||||
},
|
||||
tfimage: image,
|
||||
},
|
||||
};
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
|
@ -0,0 +1,82 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-job-simple
|
||||
// @description tf-job-simple
|
||||
// @shortDescription A simple TFJob to run CNN benchmark
|
||||
// @param name string Name for the job.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
local image = "gcr.io/kubeflow/tf-benchmarks-cpu:v20171202-bdab599-dirty-284af3";
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Worker: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"python",
|
||||
"tf_cnn_benchmarks.py",
|
||||
"--batch_size=32",
|
||||
"--model=resnet50",
|
||||
"--variable_update=parameter_server",
|
||||
"--flush_stdout=true",
|
||||
"--num_gpus=1",
|
||||
"--local_parameter_device=cpu",
|
||||
"--device=cpu",
|
||||
"--data_format=NHWC",
|
||||
],
|
||||
image: image,
|
||||
name: "tensorflow",
|
||||
workingDir: "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks",
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
},
|
||||
Ps: {
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"python",
|
||||
"tf_cnn_benchmarks.py",
|
||||
"--batch_size=32",
|
||||
"--model=resnet50",
|
||||
"--variable_update=parameter_server",
|
||||
"--flush_stdout=true",
|
||||
"--num_gpus=1",
|
||||
"--local_parameter_device=cpu",
|
||||
"--device=cpu",
|
||||
"--data_format=NHWC",
|
||||
],
|
||||
image: image,
|
||||
name: "tensorflow",
|
||||
workingDir: "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks",
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
tfReplicaType: "PS",
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
|
@ -0,0 +1,94 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-serving-simple
|
||||
// @description tf-serving-simple
|
||||
// @shortDescription tf-serving-simple
|
||||
// @param name string Name to give to each of the components
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local namespace = "default";
|
||||
local appName = import "param://name";
|
||||
local modelBasePath = "gs://kubeflow-models/inception";
|
||||
local modelName = "inception";
|
||||
local image = "gcr.io/kubeflow-images-public/tf-model-server-cpu:v20180327-995786ec";
|
||||
|
||||
local service = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
name: appName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "grpc-tf-serving",
|
||||
port: 9000,
|
||||
targetPort: 9000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: appName,
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
};
|
||||
|
||||
local deployment = {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
name: appName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"/usr/bin/tensorflow_model_server",
|
||||
"--port=9000",
|
||||
"--model_name=" + modelName,
|
||||
"--model_base_path=" + modelBasePath,
|
||||
],
|
||||
image: image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
name: "inception",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 9000,
|
||||
},
|
||||
],
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: "4",
|
||||
memory: "4Gi",
|
||||
},
|
||||
requests: {
|
||||
cpu: "1",
|
||||
memory: "1Gi",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
k.core.v1.list.new([
|
||||
service,
|
||||
deployment,
|
||||
])
|
|
@ -0,0 +1,179 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-serving-with-istio
|
||||
// @description tf-serving-with-istio
|
||||
// @shortDescription tf-serving-with-istio
|
||||
// @param name string Name to give to each of the components
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local namespace = "default";
|
||||
local appName = import "param://name";
|
||||
local modelBasePath = "gs://kubeflow-models/inception";
|
||||
local modelName = "inception";
|
||||
local image = "gcr.io/kubeflow-images-public/tf-model-server-cpu:v20180327-995786ec";
|
||||
local httpProxyImage = "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180327-995786ec";
|
||||
|
||||
local routeRule = {
|
||||
apiVersion: "config.istio.io/v1alpha2",
|
||||
kind: "RouteRule",
|
||||
metadata: {
|
||||
name: appName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
destination: {
|
||||
name: "tf-serving",
|
||||
},
|
||||
precedence: 0,
|
||||
route: [
|
||||
{
|
||||
labels: {
|
||||
version: "v1",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
local service = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tfserving-mapping-tf-serving-get",
|
||||
"prefix: /models/tf-serving/",
|
||||
"rewrite: /",
|
||||
"method: GET",
|
||||
"service: tf-serving." + namespace + ":8000",
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tfserving-mapping-tf-serving-post",
|
||||
"prefix: /models/tf-serving/",
|
||||
"rewrite: /model/tf-serving:predict",
|
||||
"method: POST",
|
||||
"service: tf-serving." + namespace + ":8000",
|
||||
]),
|
||||
},
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
name: appName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "grpc-tf-serving",
|
||||
port: 9000,
|
||||
targetPort: 9000,
|
||||
},
|
||||
{
|
||||
name: "http-tf-serving-proxy",
|
||||
port: 8000,
|
||||
targetPort: 8000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: appName,
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
};
|
||||
|
||||
local deployment = {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
name: appName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: appName,
|
||||
},
|
||||
annotations: {
|
||||
"sidecar.istio.io/inject": "true",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"/usr/bin/tensorflow_model_server",
|
||||
"--port=9000",
|
||||
"--model_name=" + modelName,
|
||||
"--model_base_path=" + modelBasePath,
|
||||
],
|
||||
image: image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
name: "inception",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 9000,
|
||||
},
|
||||
],
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: "4",
|
||||
memory: "4Gi",
|
||||
},
|
||||
requests: {
|
||||
cpu: "1",
|
||||
memory: "1Gi",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: appName + "-http-proxy",
|
||||
image: httpProxyImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
command: [
|
||||
"python",
|
||||
"/usr/src/app/server.py",
|
||||
"--port=8000",
|
||||
"--rpc_port=9000",
|
||||
"--rpc_timeout=10.0",
|
||||
],
|
||||
env: [],
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8000,
|
||||
},
|
||||
],
|
||||
resources: {
|
||||
requests: {
|
||||
memory: "1Gi",
|
||||
cpu: "1",
|
||||
},
|
||||
limits: {
|
||||
memory: "4Gi",
|
||||
cpu: "4",
|
||||
},
|
||||
},
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
fsGroup: 1000,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
k.core.v1.list.new([
|
||||
routeRule,
|
||||
service,
|
||||
deployment,
|
||||
])
|
Loading…
Reference in New Issue