mirror of https://github.com/kubeflow/examples.git
Upgrade and fix the serving components. (#348)
* Upgrade and fix the serving components. * Install a new version of the TFServing package so we can use the new template. * Fix the UI image. Use the same requirements file as for Dataflow so we are consistent w.r.t the version of TF and Tensor2Tesnro. * remove nms.libsonnet; move all the manifests into the actual component files rather than using a shared library. * Fix the name of the TFServing service and deployment; need to use the same name as used by the front end server. * Change the port of TFServing; we are now using the built in http server in TFServing which uses port 8500 as opposed to our custom http proxy. * We encountered an error importning nmslib; moving it to the top of the file appears to fix this. * Fix lint.
This commit is contained in:
parent
a630fcea34
commit
de17011066
|
|
@ -61,6 +61,16 @@ build-gcb:
|
|||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.json \
|
||||
--timeout=3600 ./build
|
||||
|
||||
build-ui-gcb:
|
||||
mkdir -p build
|
||||
jsonnet ./docker/ui/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
> ./build/build.ui.json
|
||||
cp -r ./docker ./build/
|
||||
cp -r ./src ./build/
|
||||
rm -rf ./build/src/code_search/dataflow/cli/test_data
|
||||
rm -rf ./build/src/code_search/t2t/test_data
|
||||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.ui.json \
|
||||
--timeout=3600 ./build
|
||||
|
||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||
# first.
|
||||
|
|
|
|||
|
|
@ -13,6 +13,21 @@ We are using the following project
|
|||
* **project**: code-search-demo
|
||||
* **[code-search-team@kubeflow.org](https://github.com/kubeflow/internal-acls/blob/master/code-search-team.members.txt)** Google group administering access
|
||||
|
||||
|
||||
# Deploying the services
|
||||
|
||||
1. Deploy the TFServing server
|
||||
|
||||
```
|
||||
ks12 show cs_demo -c t2t-code-search-serving
|
||||
```
|
||||
|
||||
1. Deploy the UI and nmslib index server
|
||||
|
||||
```
|
||||
ks12 apply cs_demo -c search-index-server
|
||||
```
|
||||
|
||||
# Results
|
||||
|
||||
## 2018-11-05
|
||||
|
|
@ -41,12 +56,3 @@ jlewi@ ran experiments that produced the following results
|
|||
| transformer_base_single_gpu | 1 GPU worker (K80) | ~3.22611 global step /sec
|
||||
| transformer_base | 1 chief with K80, 8 workers with 1 K80, sync training| ~ 0.0588723 global step /sec
|
||||
| transformer_base | 1 chief (no GPU), 8 workers (no GPU), sync training| ~ 0.707014 global step /sec
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,17 @@
|
|||
ARG BASE_IMAGE_TAG=1.8.0
|
||||
FROM python:2.7-jessie
|
||||
|
||||
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
|
||||
RUN apt-get update && apt-get install -y curl &&\
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - &&\
|
||||
apt-get install -y nodejs &&\
|
||||
pip install Flask~=1.0.0 \
|
||||
nmslib~=1.7.0 \
|
||||
numpy~=1.14.0 \
|
||||
oauth2client~=4.1.0 \
|
||||
requests~=2.18.0 \
|
||||
tensor2tensor~=1.8.0 &&\
|
||||
apt-get install -y nodejs && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY src/requirements.ui.txt /tmp/requirements.ui.txt
|
||||
COPY src/requirements.nmslib.txt /tmp/requirements.nmslib.txt
|
||||
RUN pip install -r /tmp/requirements.ui.txt
|
||||
RUN pip install -r /tmp/requirements.nmslib.txt
|
||||
|
||||
ADD src/ /src
|
||||
|
||||
WORKDIR /src
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
// TODO(jlewi): We should tag the image latest and then
|
||||
// use latest as a cache so that rebuilds are fast
|
||||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
|
||||
{
|
||||
|
||||
"steps": [
|
||||
{
|
||||
"id": "build-ui",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
"--file=docker/ui/Dockerfile",
|
||||
"."],
|
||||
},
|
||||
{
|
||||
"id": "tag-ui",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["tag", "gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-ui:latest",],
|
||||
"waitFor": ["build-ui"],
|
||||
},
|
||||
],
|
||||
"images": ["gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-ui:latest",
|
||||
],
|
||||
}
|
||||
|
|
@ -11,11 +11,11 @@ libraries:
|
|||
examples:
|
||||
name: examples
|
||||
registry: kubeflow
|
||||
version: defc235463799d5600001ee0ed6ef68f7af24a17
|
||||
version: dddba26893846041b7cee25ee6121e02a04bd503
|
||||
tf-serving:
|
||||
name: tf-serving
|
||||
registry: kubeflow
|
||||
version: ab6084349673e6405ae486eb3be2141e3550643c
|
||||
version: dddba26893846041b7cee25ee6121e02a04bd503
|
||||
name: kubeflow
|
||||
registries:
|
||||
incubator:
|
||||
|
|
@ -23,5 +23,5 @@ registries:
|
|||
uri: github.com/ksonnet/parts/tree/master/incubator
|
||||
kubeflow:
|
||||
protocol: github
|
||||
uri: https://github.com/kubeflow/kubeflow/tree/master/kubeflow
|
||||
uri: github.com/kubeflow/kubeflow/tree/v0.3-branch/kubeflow
|
||||
version: 0.0.1
|
||||
|
|
|
|||
|
|
@ -8,7 +8,11 @@
|
|||
eval_steps: 100,
|
||||
hparams_set: "transformer_base",
|
||||
project: "code-search-demo",
|
||||
modelDir: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/",
|
||||
modelDir: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/",
|
||||
|
||||
// modelBasePath shouldn't have integer in it.
|
||||
modelBasePath: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/",
|
||||
|
||||
problem: "kf_github_function_docstring",
|
||||
model: "kf_similarity_transformer",
|
||||
|
||||
|
|
|
|||
|
|
@ -1,167 +0,0 @@
|
|||
local baseParams = std.extVar("__ksonnet/params").components["nmslib"];
|
||||
|
||||
{
|
||||
deploymentSpec(params, env, containers, volumes=[]):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
app: params.name,
|
||||
}
|
||||
},
|
||||
spec: {
|
||||
replicas: params.replicas,
|
||||
selector: {
|
||||
matchLabels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
}
|
||||
},
|
||||
spec: {
|
||||
containers: containers,
|
||||
volumes: volumes,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
jobSpec(params, env, containers, volumes=[]):: {
|
||||
apiVersion: "batch/v1",
|
||||
kind: "Job",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
app: params.name,
|
||||
}
|
||||
},
|
||||
spec: {
|
||||
replicas: params.replicas,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
}
|
||||
},
|
||||
spec: {
|
||||
"restartPolicy": "OnFailure",
|
||||
containers: containers,
|
||||
volumes: volumes,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
containerSpec(params, env=[], volumeMounts=[], ports=[]):: {
|
||||
name: params.name,
|
||||
image: params.image,
|
||||
command: params.command,
|
||||
ports: ports,
|
||||
env: env,
|
||||
volumeMounts: volumeMounts,
|
||||
},
|
||||
|
||||
service(params, env):: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: http-mapping-" + params.name,
|
||||
"prefix: /code-search/",
|
||||
"rewrite: /",
|
||||
"method: GET",
|
||||
"service: " + params.name + "." + env.namespace + ":8008",
|
||||
]),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
type: "ClusterIP",
|
||||
selector: {
|
||||
app: params.name,
|
||||
},
|
||||
ports: [
|
||||
{
|
||||
name: "nmslib-serve-http",
|
||||
port: 8008,
|
||||
targetPort: 8008,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
parts(newParams, env):: {
|
||||
local params = baseParams + newParams,
|
||||
|
||||
local volumes = [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "user-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
local containerEnv = [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
}
|
||||
],
|
||||
|
||||
local containerVolumeMounts = [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
],
|
||||
|
||||
server:: {
|
||||
local serverParams = params + {
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"code_search.nmslib.cli.start_search_server",
|
||||
"--problem=" + params.problem,
|
||||
"--data_dir=" + params.dataDir,
|
||||
"--lookup_file=" + params.lookupFile,
|
||||
"--index_file=" + params.indexFile,
|
||||
"--serving_url=" + params.servingUrl,
|
||||
],
|
||||
},
|
||||
|
||||
local containerPorts = [
|
||||
{
|
||||
containerPort: 8008,
|
||||
}
|
||||
],
|
||||
|
||||
all: [
|
||||
$.service(serverParams, env),
|
||||
$.deploymentSpec(serverParams, env,
|
||||
[
|
||||
$.containerSpec(serverParams, env=containerEnv,
|
||||
volumeMounts=containerVolumeMounts,
|
||||
ports=containerPorts)
|
||||
],
|
||||
volumes=volumes),
|
||||
],
|
||||
}.all,
|
||||
}
|
||||
}
|
||||
|
|
@ -21,7 +21,6 @@
|
|||
image: "gcr.io/kubeflow-examples/code-search:" + imageTag,
|
||||
imageGpu: "gcr.io/kubeflow-examples/code-search-gpu:" + imageTag,
|
||||
dataflowImage: "gcr.io/kubeflow-examples/code-search-dataflow:" + imageTag,
|
||||
|
||||
imagePullSecrets: [],
|
||||
// TODO(jlewi): dataDir doesn't seem to be used.
|
||||
dataDir: "null",
|
||||
|
|
@ -63,12 +62,17 @@
|
|||
image: $.components["t2t-job"].image,
|
||||
},
|
||||
"t2t-code-search-serving": {
|
||||
name: "t2t-code-search",
|
||||
modelName: "t2t-code-search",
|
||||
modelPath: $.components["t2t-code-search"].workingDir + "/output/export/Servo",
|
||||
modelServerImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.8:latest",
|
||||
cloud: "gcp",
|
||||
name: "tf-serving",
|
||||
gcpCredentialSecretName: "user-gcp-sa",
|
||||
serviceType: "ClusterIP",
|
||||
deployHttpProxy: false,
|
||||
modelBasePath: "gs://some/model",
|
||||
// modelName is used by the client.
|
||||
modelName: "t2t-code-search",
|
||||
defaultCpuImage: "tensorflow/serving:1.11.1",
|
||||
defaultGpuImage: "tensorflow/serving:1.11.1-gpu",
|
||||
httpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723",
|
||||
numGpus: "0",
|
||||
},
|
||||
nmslib: {
|
||||
replicas: 1,
|
||||
|
|
@ -91,7 +95,10 @@
|
|||
dataDir: $.components["t2t-code-search"].workingDir + "/data",
|
||||
lookupFile: $.components["t2t-code-search"].workingDir + "/code_search_index.csv",
|
||||
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
|
||||
servingUrl: "http://t2t-code-search.kubeflow:9001/v1/models/t2t-code-search:predict",
|
||||
servingUrl: "http://t2t-code-search.kubeflow:8500/v1/models/t2t-code-search:predict",
|
||||
// 1 replica is convenient for debugging but we should bump after debugging.
|
||||
replicas: 1,
|
||||
image: "gcr.io/kubeflow-examples/code-search-ui:v20181122-dc0e646-dirty-043a63",
|
||||
},
|
||||
"submit-preprocess-job": {
|
||||
name: "submit-preprocess-job",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
local k = import "k.libsonnet";
|
||||
local nms = import "nms.libsonnet";
|
||||
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["search-index-creator"];
|
||||
|
|
|
|||
|
|
@ -1,7 +1,122 @@
|
|||
local k = import "k.libsonnet";
|
||||
local nms = import "nms.libsonnet";
|
||||
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["search-index-server"];
|
||||
local baseParams = std.extVar("__ksonnet/params").components["search-index-server"];
|
||||
|
||||
std.prune(k.core.v1.list.new(nms.parts(params, env).server))
|
||||
local experiments = import "experiments.libsonnet";
|
||||
|
||||
local experimentName = baseParams.experiment;
|
||||
local experimentParams = experiments[experimentName];
|
||||
local params = baseParams + experimentParams + {
|
||||
name: "search-index-server",
|
||||
};
|
||||
|
||||
local deploymentSpec = {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: params.replicas,
|
||||
selector: {
|
||||
matchLabels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
name: params.name,
|
||||
image: params.image,
|
||||
command: [
|
||||
"python",
|
||||
"-m",
|
||||
"code_search.nmslib.cli.start_search_server",
|
||||
"--problem=" + params.problem,
|
||||
"--data_dir=" + params.dataDir,
|
||||
"--lookup_file=" + params.lookupFile,
|
||||
"--index_file=" + params.indexFile,
|
||||
"--serving_url=" + params.servingUrl,
|
||||
],
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8008,
|
||||
},
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
volumes: [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "user-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, // spec
|
||||
};
|
||||
|
||||
local service = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: params.name,
|
||||
},
|
||||
name: params.name,
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: http-mapping-" + params.name,
|
||||
"prefix: /code-search/",
|
||||
"rewrite: /",
|
||||
"method: GET",
|
||||
"service: " + params.name + "." + env.namespace + ":8008",
|
||||
]),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
type: "ClusterIP",
|
||||
selector: {
|
||||
app: params.name,
|
||||
},
|
||||
ports: [
|
||||
{
|
||||
name: "nmslib-serve-http",
|
||||
port: 8008,
|
||||
targetPort: 8008,
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new([deploymentSpec, service]))
|
||||
|
|
|
|||
|
|
@ -1,21 +1,65 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2t-code-search-serving"];
|
||||
|
||||
local baseParams = std.extVar("__ksonnet/params").components["t2t-code-search-serving"];
|
||||
|
||||
local experiments = import "experiments.libsonnet";
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
|
||||
local name = params.name;
|
||||
local experimentName = baseParams.experiment;
|
||||
local experimentParams= experiments[experimentName];
|
||||
local params = baseParams + experimentParams + {
|
||||
name: "t2t-code-search",
|
||||
|
||||
// updatedParams includes the namespace from env by default.
|
||||
// We can override namespace in params if needed
|
||||
local updatedParams = env + params;
|
||||
// Keep in sync with the TF version used during training.
|
||||
image: "tensorflow/serving:1.11.1",
|
||||
namespace: env.namespace,
|
||||
|
||||
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
|
||||
local tfServing = tfServingBase {
|
||||
// Override parameters with user supplied parameters.
|
||||
params+: updatedParams {
|
||||
name: name,
|
||||
},
|
||||
// The TF-Serving component uses the parameter modelBasePath
|
||||
modelBasePath: experimentParams.modelBasePath,
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(tfServing.components))
|
||||
|
||||
local deployment = k.apps.v1beta1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
|
||||
local util = import "kubeflow/tf-serving/util.libsonnet";
|
||||
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
|
||||
|
||||
local base = tfserving.new(env, params);
|
||||
local tfDeployment = base.tfDeployment +
|
||||
deployment.mixin.spec.template.spec.withVolumesMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: params.gcpCredentialSecretName,
|
||||
},
|
||||
}]
|
||||
) else [],
|
||||
)+
|
||||
deployment.mapContainers(
|
||||
function(c) {
|
||||
result::
|
||||
c + container.withEnvMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json",
|
||||
}]
|
||||
) else [],
|
||||
) +
|
||||
container.withVolumeMountsMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "gcp-credentials",
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
}]
|
||||
) else [],
|
||||
),
|
||||
}.result,
|
||||
);
|
||||
util.list([
|
||||
tfDeployment,
|
||||
base.tfService,
|
||||
],)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
// NOTE: This is only a dummy to allow `ks param set`. DONOT use.
|
||||
|
||||
// TODO(jlew): We should get rid of this and use experiments.jsonnet and globals
|
||||
// to define common parameters; see https://github.com/kubeflow/examples/issues/308.
|
||||
local k = import "k.libsonnet";
|
||||
local t2tJob = import "t2t-job.libsonnet";
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,60 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-serving-aws
|
||||
// @description TensorFlow serving
|
||||
// @shortDescription A TensorFlow serving deployment
|
||||
// @param name string Name to give to each of the components
|
||||
// @optionalParam namespace string kubeflow The namespace
|
||||
// @optionalParam serviceType string ClusterIP The k8s service type for tf serving.
|
||||
// @optionalParam numGpus string 0 Number of gpus to use
|
||||
// @optionalParam deployHttpProxy string false Whether to deploy http proxy
|
||||
// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
|
||||
// @optionalParam modelName string mnist The model name
|
||||
// @optionalParam defaultCpuImage string tensorflow/serving:1.8.0 The default model server image (cpu)
|
||||
// @optionalParam defaultGpuImage string tensorflow/serving:1.10.0-gpu The default model server image (gpu)
|
||||
// @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image
|
||||
// @optionalParam s3Enable string false Whether to enable S3
|
||||
// @optionalParam s3SecretName string null Name of the k8s secrets containing S3 credentials
|
||||
// @optionalParam s3SecretAccesskeyidKeyName string AWS_ACCESS_KEY_ID Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID
|
||||
// @optionalParam s3SecretSecretaccesskeyKeyName string AWS_SECRET_ACCESS_KEY Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY
|
||||
// @optionalParam s3AwsRegion string us-west-1 S3 region
|
||||
// @optionalParam s3UseHttps string true Whether or not to use https
|
||||
// @optionalParam s3VerifySsl string true Whether or not to verify https certificates for S3 connections
|
||||
// @optionalParam s3Endpoint string http://s3.us-west-1.amazonaws.com URL for your s3-compatible endpoint
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local deployment = k.apps.v1beta1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
|
||||
local util = import "kubeflow/tf-serving/util.libsonnet";
|
||||
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
|
||||
|
||||
local base = tfserving.new(env, params);
|
||||
local tfDeployment = base.tfDeployment +
|
||||
deployment.mapContainers(
|
||||
function(c) {
|
||||
result::
|
||||
c + container.withEnvMixin(
|
||||
if util.toBool(params.s3Enable) then (
|
||||
[
|
||||
{
|
||||
name: "AWS_ACCESS_KEY_ID",
|
||||
valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretAccesskeyidKeyName } },
|
||||
},
|
||||
{
|
||||
name: "AWS_SECRET_ACCESS_KEY",
|
||||
valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretSecretaccesskeyKeyName } },
|
||||
},
|
||||
{ name: "AWS_REGION", value: params.s3AwsRegion },
|
||||
{ name: "S3_REGION", value: params.s3AwsRegion },
|
||||
{ name: "S3_USE_HTTPS", value: params.s3UseHttps },
|
||||
{ name: "S3_VERIFY_SSL", value: params.s3VerifySsl },
|
||||
{ name: "S3_ENDPOINT", value: params.s3Endpoint },
|
||||
]
|
||||
) else [],
|
||||
),
|
||||
}.result,
|
||||
);
|
||||
util.list([
|
||||
tfDeployment,
|
||||
base.tfService,
|
||||
],)
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-serving-gcp
|
||||
// @description TensorFlow serving
|
||||
// @shortDescription A TensorFlow serving deployment
|
||||
// @param name string Name to give to each of the components
|
||||
// @optionalParam namespace string kubeflow The namespace
|
||||
// @optionalParam serviceType string ClusterIP The k8s service type for tf serving.
|
||||
// @optionalParam numGpus string 0 Number of gpus to use
|
||||
// @optionalParam deployHttpProxy string false Whether to deploy http proxy
|
||||
// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
|
||||
// @optionalParam modelName string mnist The model name
|
||||
// @optionalParam defaultCpuImage string tensorflow/serving:1.8.0 The default model server image (cpu)
|
||||
// @optionalParam defaultGpuImage string tensorflow/serving:1.10.0-gpu The default model server image (gpu)
|
||||
// @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image
|
||||
// @optionalParam gcpCredentialSecretName string null If not empty, insert the secret credential
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local deployment = k.apps.v1beta1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
|
||||
local util = import "kubeflow/tf-serving/util.libsonnet";
|
||||
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
|
||||
|
||||
local base = tfserving.new(env, params);
|
||||
local tfDeployment = base.tfDeployment +
|
||||
deployment.mixin.spec.template.spec.withVolumesMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: params.gcpCredentialSecretName,
|
||||
},
|
||||
}]
|
||||
) else [],
|
||||
) +
|
||||
deployment.mapContainers(
|
||||
function(c) {
|
||||
result::
|
||||
c + container.withEnvMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json",
|
||||
}]
|
||||
) else [],
|
||||
) +
|
||||
container.withVolumeMountsMixin(
|
||||
if params.gcpCredentialSecretName != "null" then (
|
||||
[{
|
||||
name: "gcp-credentials",
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
}]
|
||||
) else [],
|
||||
),
|
||||
}.result,
|
||||
);
|
||||
util.list([
|
||||
tfDeployment,
|
||||
base.tfService,
|
||||
],)
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
{
|
||||
local k = import "k.libsonnet",
|
||||
local util = import "kubeflow/tf-serving/util.libsonnet",
|
||||
new(_env, _params):: {
|
||||
local params = _env + _params {
|
||||
namespace: if std.objectHas(_params, "namespace") && _params.namespace != "null" then
|
||||
_params.namespace else _env.namespace,
|
||||
},
|
||||
local namespace = params.namespace,
|
||||
local name = params.name,
|
||||
local modelServerImage =
|
||||
if params.numGpus == "0" then
|
||||
params.defaultCpuImage
|
||||
else
|
||||
params.defaultGpuImage,
|
||||
|
||||
// Optional features.
|
||||
// TODO(lunkai): Add Istio
|
||||
// TODO(lunkai): Add request logging
|
||||
|
||||
local tfService = {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: name,
|
||||
},
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tfserving-mapping-" + name + "-get",
|
||||
"prefix: /models/" + name + "/",
|
||||
"rewrite: /",
|
||||
"method: GET",
|
||||
"service: " + name + "." + namespace + ":8000",
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tfserving-mapping-" + name + "-post",
|
||||
"prefix: /models/" + name + "/",
|
||||
"rewrite: /model/" + name + ":predict",
|
||||
"method: POST",
|
||||
"service: " + name + "." + namespace + ":8000",
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tfserving-predict-mapping-" + name,
|
||||
"prefix: tfserving/models/" + name + "/",
|
||||
"rewrite: /v1/models/" + name + ":predict",
|
||||
"method: POST",
|
||||
"service: " + name + "." + namespace + ":8500",
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "grpc-tf-serving",
|
||||
port: 9000,
|
||||
targetPort: 9000,
|
||||
},
|
||||
{
|
||||
name: "http-tf-serving-proxy",
|
||||
port: 8000,
|
||||
targetPort: 8000,
|
||||
},
|
||||
{
|
||||
name: "tf-serving-builtin-http",
|
||||
port: 8500,
|
||||
targetPort: 8500,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: name,
|
||||
},
|
||||
type: params.serviceType,
|
||||
},
|
||||
}, // tfService
|
||||
tfService:: tfService,
|
||||
|
||||
local modelServerContainer = {
|
||||
command: [
|
||||
"/usr/bin/tensorflow_model_server",
|
||||
],
|
||||
args: [
|
||||
"--port=9000",
|
||||
"--rest_api_port=8500",
|
||||
"--model_name=" + params.modelName,
|
||||
"--model_base_path=" + params.modelBasePath,
|
||||
],
|
||||
image: modelServerImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
name: name,
|
||||
ports: [
|
||||
{
|
||||
containerPort: 9000,
|
||||
},
|
||||
{
|
||||
containerPort: 8500,
|
||||
},
|
||||
],
|
||||
env: [],
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: "4",
|
||||
memory: "4Gi",
|
||||
} + if params.numGpus != "0" then {
|
||||
"nvidia.com/gpu": params.numGpus,
|
||||
} else {},
|
||||
requests: {
|
||||
cpu: "1",
|
||||
memory: "1Gi",
|
||||
},
|
||||
},
|
||||
volumeMounts: [],
|
||||
// TCP liveness probe on gRPC port
|
||||
livenessProbe: {
|
||||
tcpSocket: {
|
||||
port: 9000,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
}, // modelServerContainer
|
||||
|
||||
local httpProxyContainer = {
|
||||
name: name + "-http-proxy",
|
||||
image: params.httpProxyImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
command: [
|
||||
"python",
|
||||
"/usr/src/app/server.py",
|
||||
"--port=8000",
|
||||
"--rpc_port=9000",
|
||||
"--rpc_timeout=10.0",
|
||||
],
|
||||
env: [],
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8000,
|
||||
},
|
||||
],
|
||||
resources: {
|
||||
requests: {
|
||||
memory: "500Mi",
|
||||
cpu: "0.5",
|
||||
},
|
||||
limits: {
|
||||
memory: "1Gi",
|
||||
cpu: "1",
|
||||
},
|
||||
},
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
fsGroup: 1000,
|
||||
},
|
||||
}, // httpProxyContainer
|
||||
|
||||
local tfDeployment = {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: name,
|
||||
},
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
modelServerContainer,
|
||||
] + if util.toBool(params.deployHttpProxy) then [
|
||||
httpProxyContainer,
|
||||
] else [],
|
||||
volumes: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // tfDeployment
|
||||
tfDeployment:: tfDeployment,
|
||||
}, // new
|
||||
}
|
||||
|
|
@ -26,8 +26,8 @@
|
|||
// in which case the image used will still depend on whether GPUs are used or not.
|
||||
// Users can also override modelServerImage in which case the user supplied value will always be used
|
||||
// regardless of numGpus.
|
||||
defaultCpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.7:v20180604-0da89b8a",
|
||||
defaultGpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.6gpu:v20180604-0da89b8a",
|
||||
defaultCpuImage: "tensorflow/serving:1.8.0",
|
||||
defaultGpuImage: "tensorflow/serving:1.10.0-gpu",
|
||||
modelServerImage: if $.params.numGpus == 0 then
|
||||
$.params.defaultCpuImage
|
||||
else
|
||||
|
|
@ -114,10 +114,11 @@
|
|||
name: $.params.name,
|
||||
image: $.params.modelServerImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
args: [
|
||||
command: [
|
||||
"/usr/bin/tensorflow_model_server",
|
||||
],
|
||||
args: [
|
||||
"--port=9000",
|
||||
"--rest_api_port=9001",
|
||||
"--model_name=" + $.params.modelName,
|
||||
"--model_base_path=" + $.params.modelPath,
|
||||
],
|
||||
|
|
@ -125,9 +126,6 @@
|
|||
{
|
||||
containerPort: 9000,
|
||||
},
|
||||
{
|
||||
containerPort: 9001,
|
||||
},
|
||||
],
|
||||
// TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
|
||||
// model-server doesn't have something we can use out of the box.
|
||||
|
|
@ -272,11 +270,6 @@
|
|||
port: 9000,
|
||||
targetPort: 9000,
|
||||
},
|
||||
{
|
||||
name: "rest-tf-serving",
|
||||
port: 9001,
|
||||
targetPort: 9001,
|
||||
},
|
||||
{
|
||||
name: "http-tf-serving-proxy",
|
||||
port: 8000,
|
||||
|
|
@ -1,13 +1,6 @@
|
|||
// Some useful routines.
|
||||
{
|
||||
// Convert a string to upper case.
|
||||
upper:: function(x) {
|
||||
local cp(c) = std.codepoint(c),
|
||||
local upLetter(c) = if cp(c) >= 97 && cp(c) < 123 then
|
||||
std.char(cp(c) - 32)
|
||||
else c,
|
||||
result:: std.join("", std.map(upLetter, std.stringChars(x))),
|
||||
}.result,
|
||||
local k = import "k.libsonnet",
|
||||
|
||||
// Convert non-boolean types like string,number to a boolean.
|
||||
// This is primarily intended for dealing with parameters that should be booleans.
|
||||
|
|
@ -22,4 +15,7 @@
|
|||
else
|
||||
false,
|
||||
}.result,
|
||||
|
||||
// Produce a list of manifests. obj must be an array
|
||||
list(obj):: k.core.v1.list.new(obj,),
|
||||
}
|
||||
|
|
@ -1,9 +1,15 @@
|
|||
import csv
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import functools
|
||||
import requests
|
||||
# TODO(jlewi): We import nmslib at the top as a hack to fix the error
|
||||
# ImportError: dlopen: cannot load any more object with static TLS.
|
||||
# We get this error when running inside a docker container. Moving the
|
||||
# import to the top of the file seems to work around this.
|
||||
import nmslib # pylint: disable=unused-import
|
||||
|
||||
import csv # pylint: disable=wrong-import-order
|
||||
import logging # pylint: disable=wrong-import-order
|
||||
import json # pylint: disable=wrong-import-order
|
||||
import os # pylint: disable=wrong-import-order
|
||||
import functools # pylint: disable=wrong-import-order
|
||||
import requests # pylint: disable=wrong-import-order
|
||||
import tensorflow as tf
|
||||
|
||||
import code_search.nmslib.cli.arguments as arguments
|
||||
|
|
@ -17,6 +23,7 @@ from code_search.nmslib.search_server import CodeSearchServer
|
|||
def embed_query(encoder, serving_url, query_str):
|
||||
data = {"instances": [{"input": {"b64": encoder(query_str)}}]}
|
||||
|
||||
logging.info("Sending request to: %s", serving_url)
|
||||
response = requests.post(url=serving_url,
|
||||
headers={'content-type': 'application/json'},
|
||||
data=json.dumps(data))
|
||||
|
|
@ -54,14 +61,12 @@ def start_search_server(argv=None):
|
|||
Args:
|
||||
argv: A list of strings representing command line arguments.
|
||||
"""
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
|
||||
args = arguments.parse_arguments(argv)
|
||||
|
||||
if not os.path.isdir(args.tmp_dir):
|
||||
os.makedirs(args.tmp_dir)
|
||||
|
||||
tf.logging.debug('Reading {}'.format(args.lookup_file))
|
||||
logging.info('Reading %s', args.lookup_file)
|
||||
lookup_data = []
|
||||
with tf.gfile.Open(args.lookup_file) as lookup_file:
|
||||
reader = csv.reader(lookup_file)
|
||||
|
|
@ -70,7 +75,7 @@ def start_search_server(argv=None):
|
|||
|
||||
tmp_index_file = os.path.join(args.tmp_dir, os.path.basename(args.index_file))
|
||||
|
||||
tf.logging.debug('Reading {}'.format(args.index_file))
|
||||
logging.info('Reading %s', args.index_file)
|
||||
if not os.path.isfile(tmp_index_file):
|
||||
tf.gfile.Copy(args.index_file, tmp_index_file)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import nmslib
|
||||
|
||||
|
||||
|
|
@ -24,7 +25,9 @@ class CodeSearchEngine:
|
|||
self.embedding_fn = embedding_fn
|
||||
|
||||
def query(self, query_str, k=2):
|
||||
logging.info("Embedding query: %s", query_str)
|
||||
embedding = self.embedding_fn(query_str)
|
||||
logging.info("Calling knn server")
|
||||
idxs, dists = self.index.knnQuery(embedding, k=k)
|
||||
|
||||
result = [dict(zip(self.DICT_LABELS, self.lookup_data[id])) for id in idxs]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import logging
|
||||
import os
|
||||
from flask import Flask, request, abort, jsonify, make_response, redirect
|
||||
|
||||
|
|
@ -40,6 +41,7 @@ class CodeSearchServer:
|
|||
@self.app.route('/query')
|
||||
def query():
|
||||
query_str = request.args.get('q')
|
||||
logging.info("Got query: %s", query_str)
|
||||
if not query_str:
|
||||
abort(make_response(
|
||||
jsonify(status=400, error="empty query"), 400))
|
||||
|
|
|
|||
Loading…
Reference in New Issue