Upgrade and fix the serving components. (#348)

* Upgrade and fix the serving components.

* Install a new version of the TFServing package so we can use the new template.

* Fix the UI image. Use the same requirements file as for Dataflow so we are
consistent w.r.t the version of TF and Tensor2Tesnro.

* remove nms.libsonnet; move all the manifests into the actual component
  files rather than using a shared library.

* Fix the name of the TFServing service and deployment; need to use the same
  name as used by the front end server.

* Change the port of TFServing; we are now using the built in http server
  in TFServing which uses port 8500 as opposed to our custom http proxy.

* We encountered an error importning nmslib; moving it to the top of the file
  appears to fix this.

* Fix lint.
This commit is contained in:
Jeremy Lewi 2018-11-24 13:22:34 -08:00 committed by k8s-ci-robot
parent a630fcea34
commit de17011066
31 changed files with 602 additions and 244 deletions

View File

@ -61,6 +61,16 @@ build-gcb:
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.json \
--timeout=3600 ./build
build-ui-gcb:
mkdir -p build
jsonnet ./docker/ui/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
> ./build/build.ui.json
cp -r ./docker ./build/
cp -r ./src ./build/
rm -rf ./build/src/code_search/dataflow/cli/test_data
rm -rf ./build/src/code_search/t2t/test_data
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.ui.json \
--timeout=3600 ./build
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
# first.

View File

@ -13,6 +13,21 @@ We are using the following project
* **project**: code-search-demo
* **[code-search-team@kubeflow.org](https://github.com/kubeflow/internal-acls/blob/master/code-search-team.members.txt)** Google group administering access
# Deploying the services
1. Deploy the TFServing server
```
ks12 show cs_demo -c t2t-code-search-serving
```
1. Deploy the UI and nmslib index server
```
ks12 apply cs_demo -c search-index-server
```
# Results
## 2018-11-05
@ -41,12 +56,3 @@ jlewi@ ran experiments that produced the following results
| transformer_base_single_gpu | 1 GPU worker (K80) | ~3.22611 global step /sec
| transformer_base | 1 chief with K80, 8 workers with 1 K80, sync training| ~ 0.0588723 global step /sec
| transformer_base | 1 chief (no GPU), 8 workers (no GPU), sync training| ~ 0.707014 global step /sec

View File

@ -1,17 +1,17 @@
ARG BASE_IMAGE_TAG=1.8.0
FROM python:2.7-jessie
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
RUN apt-get update && apt-get install -y curl &&\
rm -rf /var/lib/apt/lists/*
RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - &&\
apt-get install -y nodejs &&\
pip install Flask~=1.0.0 \
nmslib~=1.7.0 \
numpy~=1.14.0 \
oauth2client~=4.1.0 \
requests~=2.18.0 \
tensor2tensor~=1.8.0 &&\
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*
COPY src/requirements.ui.txt /tmp/requirements.ui.txt
COPY src/requirements.nmslib.txt /tmp/requirements.nmslib.txt
RUN pip install -r /tmp/requirements.ui.txt
RUN pip install -r /tmp/requirements.nmslib.txt
ADD src/ /src
WORKDIR /src

View File

@ -0,0 +1,26 @@
// TODO(jlewi): We should tag the image latest and then
// use latest as a cache so that rebuilds are fast
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
{
"steps": [
{
"id": "build-ui",
"name": "gcr.io/cloud-builders/docker",
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
"--label=git-versions=" + std.extVar("gitVersion"),
"--file=docker/ui/Dockerfile",
"."],
},
{
"id": "tag-ui",
"name": "gcr.io/cloud-builders/docker",
"args": ["tag", "gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-ui:latest",],
"waitFor": ["build-ui"],
},
],
"images": ["gcr.io/kubeflow-examples/code-search-ui:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-ui:latest",
],
}

View File

@ -11,11 +11,11 @@ libraries:
examples:
name: examples
registry: kubeflow
version: defc235463799d5600001ee0ed6ef68f7af24a17
version: dddba26893846041b7cee25ee6121e02a04bd503
tf-serving:
name: tf-serving
registry: kubeflow
version: ab6084349673e6405ae486eb3be2141e3550643c
version: dddba26893846041b7cee25ee6121e02a04bd503
name: kubeflow
registries:
incubator:
@ -23,5 +23,5 @@ registries:
uri: github.com/ksonnet/parts/tree/master/incubator
kubeflow:
protocol: github
uri: https://github.com/kubeflow/kubeflow/tree/master/kubeflow
uri: github.com/kubeflow/kubeflow/tree/v0.3-branch/kubeflow
version: 0.0.1

View File

@ -8,7 +8,11 @@
eval_steps: 100,
hparams_set: "transformer_base",
project: "code-search-demo",
modelDir: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/",
modelDir: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/",
// modelBasePath shouldn't have integer in it.
modelBasePath: "gs://code-search-demo/models/20181107-dist-sync-gpu/export/",
problem: "kf_github_function_docstring",
model: "kf_similarity_transformer",

View File

@ -1,167 +0,0 @@
local baseParams = std.extVar("__ksonnet/params").components["nmslib"];
{
deploymentSpec(params, env, containers, volumes=[]):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: params.name,
namespace: env.namespace,
labels: {
app: params.name,
}
},
spec: {
replicas: params.replicas,
selector: {
matchLabels: {
app: params.name,
},
},
template: {
metadata: {
labels: {
app: params.name,
}
},
spec: {
containers: containers,
volumes: volumes,
},
},
},
},
jobSpec(params, env, containers, volumes=[]):: {
apiVersion: "batch/v1",
kind: "Job",
metadata: {
name: params.name,
namespace: env.namespace,
labels: {
app: params.name,
}
},
spec: {
replicas: params.replicas,
template: {
metadata: {
labels: {
app: params.name,
}
},
spec: {
"restartPolicy": "OnFailure",
containers: containers,
volumes: volumes,
},
},
},
},
containerSpec(params, env=[], volumeMounts=[], ports=[]):: {
name: params.name,
image: params.image,
command: params.command,
ports: ports,
env: env,
volumeMounts: volumeMounts,
},
service(params, env):: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: params.name,
},
name: params.name,
namespace: env.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: http-mapping-" + params.name,
"prefix: /code-search/",
"rewrite: /",
"method: GET",
"service: " + params.name + "." + env.namespace + ":8008",
]),
},
},
spec: {
type: "ClusterIP",
selector: {
app: params.name,
},
ports: [
{
name: "nmslib-serve-http",
port: 8008,
targetPort: 8008,
},
],
},
},
parts(newParams, env):: {
local params = baseParams + newParams,
local volumes = [
{
name: "gcp-credentials",
secret: {
secretName: "user-gcp-sa",
},
},
],
local containerEnv = [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/user-gcp-sa.json",
}
],
local containerVolumeMounts = [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
],
server:: {
local serverParams = params + {
command: [
"python",
"-m",
"code_search.nmslib.cli.start_search_server",
"--problem=" + params.problem,
"--data_dir=" + params.dataDir,
"--lookup_file=" + params.lookupFile,
"--index_file=" + params.indexFile,
"--serving_url=" + params.servingUrl,
],
},
local containerPorts = [
{
containerPort: 8008,
}
],
all: [
$.service(serverParams, env),
$.deploymentSpec(serverParams, env,
[
$.containerSpec(serverParams, env=containerEnv,
volumeMounts=containerVolumeMounts,
ports=containerPorts)
],
volumes=volumes),
],
}.all,
}
}

View File

@ -21,7 +21,6 @@
image: "gcr.io/kubeflow-examples/code-search:" + imageTag,
imageGpu: "gcr.io/kubeflow-examples/code-search-gpu:" + imageTag,
dataflowImage: "gcr.io/kubeflow-examples/code-search-dataflow:" + imageTag,
imagePullSecrets: [],
// TODO(jlewi): dataDir doesn't seem to be used.
dataDir: "null",
@ -63,12 +62,17 @@
image: $.components["t2t-job"].image,
},
"t2t-code-search-serving": {
name: "t2t-code-search",
modelName: "t2t-code-search",
modelPath: $.components["t2t-code-search"].workingDir + "/output/export/Servo",
modelServerImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.8:latest",
cloud: "gcp",
name: "tf-serving",
gcpCredentialSecretName: "user-gcp-sa",
serviceType: "ClusterIP",
deployHttpProxy: false,
modelBasePath: "gs://some/model",
// modelName is used by the client.
modelName: "t2t-code-search",
defaultCpuImage: "tensorflow/serving:1.11.1",
defaultGpuImage: "tensorflow/serving:1.11.1-gpu",
httpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723",
numGpus: "0",
},
nmslib: {
replicas: 1,
@ -91,7 +95,10 @@
dataDir: $.components["t2t-code-search"].workingDir + "/data",
lookupFile: $.components["t2t-code-search"].workingDir + "/code_search_index.csv",
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
servingUrl: "http://t2t-code-search.kubeflow:9001/v1/models/t2t-code-search:predict",
servingUrl: "http://t2t-code-search.kubeflow:8500/v1/models/t2t-code-search:predict",
// 1 replica is convenient for debugging but we should bump after debugging.
replicas: 1,
image: "gcr.io/kubeflow-examples/code-search-ui:v20181122-dc0e646-dirty-043a63",
},
"submit-preprocess-job": {
name: "submit-preprocess-job",

View File

@ -1,5 +1,4 @@
local k = import "k.libsonnet";
local nms = import "nms.libsonnet";
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["search-index-creator"];

View File

@ -1,7 +1,122 @@
local k = import "k.libsonnet";
local nms = import "nms.libsonnet";
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["search-index-server"];
local baseParams = std.extVar("__ksonnet/params").components["search-index-server"];
std.prune(k.core.v1.list.new(nms.parts(params, env).server))
local experiments = import "experiments.libsonnet";
local experimentName = baseParams.experiment;
local experimentParams = experiments[experimentName];
local params = baseParams + experimentParams + {
name: "search-index-server",
};
local deploymentSpec = {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: params.name,
namespace: env.namespace,
labels: {
app: params.name,
},
},
spec: {
replicas: params.replicas,
selector: {
matchLabels: {
app: params.name,
},
},
template: {
metadata: {
labels: {
app: params.name,
},
},
spec: {
containers: [
{
name: params.name,
image: params.image,
command: [
"python",
"-m",
"code_search.nmslib.cli.start_search_server",
"--problem=" + params.problem,
"--data_dir=" + params.dataDir,
"--lookup_file=" + params.lookupFile,
"--index_file=" + params.indexFile,
"--serving_url=" + params.servingUrl,
],
ports: [
{
containerPort: 8008,
},
],
env: [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/user-gcp-sa.json",
},
],
volumeMounts: [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
],
},
],
volumes: [
{
name: "gcp-credentials",
secret: {
secretName: "user-gcp-sa",
},
},
],
},
},
}, // spec
};
local service = {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: params.name,
},
name: params.name,
namespace: env.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: http-mapping-" + params.name,
"prefix: /code-search/",
"rewrite: /",
"method: GET",
"service: " + params.name + "." + env.namespace + ":8008",
]),
},
},
spec: {
type: "ClusterIP",
selector: {
app: params.name,
},
ports: [
{
name: "nmslib-serve-http",
port: 8008,
targetPort: 8008,
},
],
},
};
std.prune(k.core.v1.list.new([deploymentSpec, service]))

View File

@ -1,21 +1,65 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2t-code-search-serving"];
local baseParams = std.extVar("__ksonnet/params").components["t2t-code-search-serving"];
local experiments = import "experiments.libsonnet";
local k = import "k.libsonnet";
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
local name = params.name;
local experimentName = baseParams.experiment;
local experimentParams= experiments[experimentName];
local params = baseParams + experimentParams + {
name: "t2t-code-search",
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
// Keep in sync with the TF version used during training.
image: "tensorflow/serving:1.11.1",
namespace: env.namespace,
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
local tfServing = tfServingBase {
// Override parameters with user supplied parameters.
params+: updatedParams {
name: name,
},
// The TF-Serving component uses the parameter modelBasePath
modelBasePath: experimentParams.modelBasePath,
};
std.prune(k.core.v1.list.new(tfServing.components))
local deployment = k.apps.v1beta1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local util = import "kubeflow/tf-serving/util.libsonnet";
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
local base = tfserving.new(env, params);
local tfDeployment = base.tfDeployment +
deployment.mixin.spec.template.spec.withVolumesMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "gcp-credentials",
secret: {
secretName: params.gcpCredentialSecretName,
},
}]
) else [],
)+
deployment.mapContainers(
function(c) {
result::
c + container.withEnvMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json",
}]
) else [],
) +
container.withVolumeMountsMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
}]
) else [],
),
}.result,
);
util.list([
tfDeployment,
base.tfService,
],)

View File

@ -1,5 +1,6 @@
// NOTE: This is only a dummy to allow `ks param set`. DONOT use.
// TODO(jlew): We should get rid of this and use experiments.jsonnet and globals
// to define common parameters; see https://github.com/kubeflow/examples/issues/308.
local k = import "k.libsonnet";
local t2tJob = import "t2t-job.libsonnet";

View File

@ -0,0 +1,60 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.tf-serving-aws
// @description TensorFlow serving
// @shortDescription A TensorFlow serving deployment
// @param name string Name to give to each of the components
// @optionalParam namespace string kubeflow The namespace
// @optionalParam serviceType string ClusterIP The k8s service type for tf serving.
// @optionalParam numGpus string 0 Number of gpus to use
// @optionalParam deployHttpProxy string false Whether to deploy http proxy
// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
// @optionalParam modelName string mnist The model name
// @optionalParam defaultCpuImage string tensorflow/serving:1.8.0 The default model server image (cpu)
// @optionalParam defaultGpuImage string tensorflow/serving:1.10.0-gpu The default model server image (gpu)
// @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image
// @optionalParam s3Enable string false Whether to enable S3
// @optionalParam s3SecretName string null Name of the k8s secrets containing S3 credentials
// @optionalParam s3SecretAccesskeyidKeyName string AWS_ACCESS_KEY_ID Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID
// @optionalParam s3SecretSecretaccesskeyKeyName string AWS_SECRET_ACCESS_KEY Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY
// @optionalParam s3AwsRegion string us-west-1 S3 region
// @optionalParam s3UseHttps string true Whether or not to use https
// @optionalParam s3VerifySsl string true Whether or not to verify https certificates for S3 connections
// @optionalParam s3Endpoint string http://s3.us-west-1.amazonaws.com URL for your s3-compatible endpoint
local k = import "k.libsonnet";
local deployment = k.apps.v1beta1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local util = import "kubeflow/tf-serving/util.libsonnet";
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
local base = tfserving.new(env, params);
local tfDeployment = base.tfDeployment +
deployment.mapContainers(
function(c) {
result::
c + container.withEnvMixin(
if util.toBool(params.s3Enable) then (
[
{
name: "AWS_ACCESS_KEY_ID",
valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretAccesskeyidKeyName } },
},
{
name: "AWS_SECRET_ACCESS_KEY",
valueFrom: { secretKeyRef: { name: params.s3SecretName, key: params.s3SecretSecretaccesskeyKeyName } },
},
{ name: "AWS_REGION", value: params.s3AwsRegion },
{ name: "S3_REGION", value: params.s3AwsRegion },
{ name: "S3_USE_HTTPS", value: params.s3UseHttps },
{ name: "S3_VERIFY_SSL", value: params.s3VerifySsl },
{ name: "S3_ENDPOINT", value: params.s3Endpoint },
]
) else [],
),
}.result,
);
util.list([
tfDeployment,
base.tfService,
],)

View File

@ -0,0 +1,60 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.tf-serving-gcp
// @description TensorFlow serving
// @shortDescription A TensorFlow serving deployment
// @param name string Name to give to each of the components
// @optionalParam namespace string kubeflow The namespace
// @optionalParam serviceType string ClusterIP The k8s service type for tf serving.
// @optionalParam numGpus string 0 Number of gpus to use
// @optionalParam deployHttpProxy string false Whether to deploy http proxy
// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
// @optionalParam modelName string mnist The model name
// @optionalParam defaultCpuImage string tensorflow/serving:1.8.0 The default model server image (cpu)
// @optionalParam defaultGpuImage string tensorflow/serving:1.10.0-gpu The default model server image (gpu)
// @optionalParam httpProxyImage string gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723 Http proxy image
// @optionalParam gcpCredentialSecretName string null If not empty, insert the secret credential
local k = import "k.libsonnet";
local deployment = k.apps.v1beta1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local util = import "kubeflow/tf-serving/util.libsonnet";
local tfserving = import "kubeflow/tf-serving/tf-serving-template.libsonnet";
local base = tfserving.new(env, params);
local tfDeployment = base.tfDeployment +
deployment.mixin.spec.template.spec.withVolumesMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "gcp-credentials",
secret: {
secretName: params.gcpCredentialSecretName,
},
}]
) else [],
) +
deployment.mapContainers(
function(c) {
result::
c + container.withEnvMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json",
}]
) else [],
) +
container.withVolumeMountsMixin(
if params.gcpCredentialSecretName != "null" then (
[{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
}]
) else [],
),
}.result,
);
util.list([
tfDeployment,
base.tfService,
],)

View File

@ -0,0 +1,194 @@
{
local k = import "k.libsonnet",
local util = import "kubeflow/tf-serving/util.libsonnet",
new(_env, _params):: {
local params = _env + _params {
namespace: if std.objectHas(_params, "namespace") && _params.namespace != "null" then
_params.namespace else _env.namespace,
},
local namespace = params.namespace,
local name = params.name,
local modelServerImage =
if params.numGpus == "0" then
params.defaultCpuImage
else
params.defaultGpuImage,
// Optional features.
// TODO(lunkai): Add Istio
// TODO(lunkai): Add request logging
local tfService = {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: name,
},
name: name,
namespace: namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tfserving-mapping-" + name + "-get",
"prefix: /models/" + name + "/",
"rewrite: /",
"method: GET",
"service: " + name + "." + namespace + ":8000",
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tfserving-mapping-" + name + "-post",
"prefix: /models/" + name + "/",
"rewrite: /model/" + name + ":predict",
"method: POST",
"service: " + name + "." + namespace + ":8000",
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tfserving-predict-mapping-" + name,
"prefix: tfserving/models/" + name + "/",
"rewrite: /v1/models/" + name + ":predict",
"method: POST",
"service: " + name + "." + namespace + ":8500",
]),
}, //annotations
},
spec: {
ports: [
{
name: "grpc-tf-serving",
port: 9000,
targetPort: 9000,
},
{
name: "http-tf-serving-proxy",
port: 8000,
targetPort: 8000,
},
{
name: "tf-serving-builtin-http",
port: 8500,
targetPort: 8500,
},
],
selector: {
app: name,
},
type: params.serviceType,
},
}, // tfService
tfService:: tfService,
local modelServerContainer = {
command: [
"/usr/bin/tensorflow_model_server",
],
args: [
"--port=9000",
"--rest_api_port=8500",
"--model_name=" + params.modelName,
"--model_base_path=" + params.modelBasePath,
],
image: modelServerImage,
imagePullPolicy: "IfNotPresent",
name: name,
ports: [
{
containerPort: 9000,
},
{
containerPort: 8500,
},
],
env: [],
resources: {
limits: {
cpu: "4",
memory: "4Gi",
} + if params.numGpus != "0" then {
"nvidia.com/gpu": params.numGpus,
} else {},
requests: {
cpu: "1",
memory: "1Gi",
},
},
volumeMounts: [],
// TCP liveness probe on gRPC port
livenessProbe: {
tcpSocket: {
port: 9000,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
}, // modelServerContainer
local httpProxyContainer = {
name: name + "-http-proxy",
image: params.httpProxyImage,
imagePullPolicy: "IfNotPresent",
command: [
"python",
"/usr/src/app/server.py",
"--port=8000",
"--rpc_port=9000",
"--rpc_timeout=10.0",
],
env: [],
ports: [
{
containerPort: 8000,
},
],
resources: {
requests: {
memory: "500Mi",
cpu: "0.5",
},
limits: {
memory: "1Gi",
cpu: "1",
},
},
securityContext: {
runAsUser: 1000,
fsGroup: 1000,
},
}, // httpProxyContainer
local tfDeployment = {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
labels: {
app: name,
},
name: name,
namespace: namespace,
},
spec: {
template: {
metadata: {
labels: {
app: name,
},
},
spec: {
containers: [
modelServerContainer,
] + if util.toBool(params.deployHttpProxy) then [
httpProxyContainer,
] else [],
volumes: [],
},
},
},
}, // tfDeployment
tfDeployment:: tfDeployment,
}, // new
}

View File

@ -26,8 +26,8 @@
// in which case the image used will still depend on whether GPUs are used or not.
// Users can also override modelServerImage in which case the user supplied value will always be used
// regardless of numGpus.
defaultCpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.7:v20180604-0da89b8a",
defaultGpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.6gpu:v20180604-0da89b8a",
defaultCpuImage: "tensorflow/serving:1.8.0",
defaultGpuImage: "tensorflow/serving:1.10.0-gpu",
modelServerImage: if $.params.numGpus == 0 then
$.params.defaultCpuImage
else
@ -114,10 +114,11 @@
name: $.params.name,
image: $.params.modelServerImage,
imagePullPolicy: "IfNotPresent",
args: [
command: [
"/usr/bin/tensorflow_model_server",
],
args: [
"--port=9000",
"--rest_api_port=9001",
"--model_name=" + $.params.modelName,
"--model_base_path=" + $.params.modelPath,
],
@ -125,9 +126,6 @@
{
containerPort: 9000,
},
{
containerPort: 9001,
},
],
// TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
// model-server doesn't have something we can use out of the box.
@ -272,11 +270,6 @@
port: 9000,
targetPort: 9000,
},
{
name: "rest-tf-serving",
port: 9001,
targetPort: 9001,
},
{
name: "http-tf-serving-proxy",
port: 8000,

View File

@ -1,13 +1,6 @@
// Some useful routines.
{
// Convert a string to upper case.
upper:: function(x) {
local cp(c) = std.codepoint(c),
local upLetter(c) = if cp(c) >= 97 && cp(c) < 123 then
std.char(cp(c) - 32)
else c,
result:: std.join("", std.map(upLetter, std.stringChars(x))),
}.result,
local k = import "k.libsonnet",
// Convert non-boolean types like string,number to a boolean.
// This is primarily intended for dealing with parameters that should be booleans.
@ -22,4 +15,7 @@
else
false,
}.result,
// Produce a list of manifests. obj must be an array
list(obj):: k.core.v1.list.new(obj,),
}

View File

@ -1,9 +1,15 @@
import csv
import logging
import json
import os
import functools
import requests
# TODO(jlewi): We import nmslib at the top as a hack to fix the error
# ImportError: dlopen: cannot load any more object with static TLS.
# We get this error when running inside a docker container. Moving the
# import to the top of the file seems to work around this.
import nmslib # pylint: disable=unused-import
import csv # pylint: disable=wrong-import-order
import logging # pylint: disable=wrong-import-order
import json # pylint: disable=wrong-import-order
import os # pylint: disable=wrong-import-order
import functools # pylint: disable=wrong-import-order
import requests # pylint: disable=wrong-import-order
import tensorflow as tf
import code_search.nmslib.cli.arguments as arguments
@ -17,6 +23,7 @@ from code_search.nmslib.search_server import CodeSearchServer
def embed_query(encoder, serving_url, query_str):
data = {"instances": [{"input": {"b64": encoder(query_str)}}]}
logging.info("Sending request to: %s", serving_url)
response = requests.post(url=serving_url,
headers={'content-type': 'application/json'},
data=json.dumps(data))
@ -54,14 +61,12 @@ def start_search_server(argv=None):
Args:
argv: A list of strings representing command line arguments.
"""
tf.logging.set_verbosity(tf.logging.INFO)
args = arguments.parse_arguments(argv)
if not os.path.isdir(args.tmp_dir):
os.makedirs(args.tmp_dir)
tf.logging.debug('Reading {}'.format(args.lookup_file))
logging.info('Reading %s', args.lookup_file)
lookup_data = []
with tf.gfile.Open(args.lookup_file) as lookup_file:
reader = csv.reader(lookup_file)
@ -70,7 +75,7 @@ def start_search_server(argv=None):
tmp_index_file = os.path.join(args.tmp_dir, os.path.basename(args.index_file))
tf.logging.debug('Reading {}'.format(args.index_file))
logging.info('Reading %s', args.index_file)
if not os.path.isfile(tmp_index_file):
tf.gfile.Copy(args.index_file, tmp_index_file)

View File

@ -1,3 +1,4 @@
import logging
import nmslib
@ -24,7 +25,9 @@ class CodeSearchEngine:
self.embedding_fn = embedding_fn
def query(self, query_str, k=2):
logging.info("Embedding query: %s", query_str)
embedding = self.embedding_fn(query_str)
logging.info("Calling knn server")
idxs, dists = self.index.knnQuery(embedding, k=k)
result = [dict(zip(self.DICT_LABELS, self.lookup_data[id])) for id in idxs]

View File

@ -1,3 +1,4 @@
import logging
import os
from flask import Flask, request, abort, jsonify, make_response, redirect
@ -40,6 +41,7 @@ class CodeSearchServer:
@self.app.route('/query')
def query():
query_str = request.args.get('q')
logging.info("Got query: %s", query_str)
if not query_str:
abort(make_response(
jsonify(status=400, error="empty query"), 400))