Add new TF-Serving component with sample task (#152)

* Add new TF-Serving component with sample task

* Unify nmslib and t2t packages, need to be cohesive

* [WIP] update references to the package

* Replace old T2T problem

* Add representative code for encoding/decoding from tf serving service

* Add rest API port to TF serving (replaces custom http proxy)

* Fix linting

* Add NMSLib creator and server components

* Add docs to CLI module
This commit is contained in:
Sanyam Kapoor 2018-06-28 20:37:21 -07:00 committed by k8s-ci-robot
parent 11b75edfd9
commit c1b2802313
33 changed files with 964 additions and 180 deletions

View File

@ -86,11 +86,11 @@ $ gcloud auth configure-docker
* Build and push the image
```
$ PROJECT=my-project ./language_task/build_image.sh
$ PROJECT=my-project ./build_image.sh
```
and a GPU image
```
$ GPU=1 PROJECT=my-project ./language_task/build_image.sh
$ GPU=1 PROJECT=my-project ./build_image.sh
```
See [GCR Pushing and Pulling Images](https://cloud.google.com/container-registry/docs/pushing-and-pulling) for more.

View File

@ -1,28 +1,27 @@
# NOTE: The context for this build must be the `language_task` directory
# NOTE: The context for this build must be the `app` directory
ARG BASE_IMAGE_TAG=1.8.0-py3
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
ADD requirements.txt /
ADD . /app
RUN pip3 --no-cache-dir install -r /requirements.txt &&\
WORKDIR /app
ENV T2T_USR_DIR=/app/code_search/t2t
RUN pip3 --no-cache-dir install . &&\
apt-get update && apt-get install -y jq &&\
rm -rf /var/lib/apt/lists/*
VOLUME ["/data", "/output"]
ADD t2t_problems/* /t2t_problems/
ADD t2t-entrypoint.sh /usr/local/sbin/t2t-entrypoint
ENV T2T_USR_DIR=/t2t_problems
WORKDIR /t2t_problems
#ENTRYPOINT ["/usr/local/sbin/t2t-entrypoint"]
rm -rf /var/lib/apt/lists/* &&\
ln -s /app/t2t-entrypoint.sh /usr/local/sbin/t2t-entrypoint
# TODO(sanyamkapoor): A workaround for tensorflow/tensor2tensor#879
RUN apt-get update && apt-get install -y curl python &&\
curl https://sdk.cloud.google.com | bash &&\
rm -rf /var/lib/apt/lists/*
VOLUME ["/data", "/output"]
EXPOSE 8008
ENTRYPOINT ["bash"]

View File

@ -0,0 +1,90 @@
"""
This module serves as the entrypoint to either create an nmslib index or
start a Flask server to serve the index via a simple REST interface. It
internally talks to TF Serving for inference related tasks. The
two entrypoints `server` and `creator` are exposed as `nmslib-create`
and `nmslib-serve` binaries (see `setup.py`). Use `-h` to get a list
of input CLI arguments to both.
"""
import sys
import os
import argparse
import numpy as np
from code_search.nmslib.gcs import maybe_download_gcs_file, maybe_upload_gcs_file
from code_search.nmslib.search_engine import CodeSearchEngine
from code_search.nmslib.search_server import CodeSearchServer
def parse_server_args(args):
parser = argparse.ArgumentParser(prog='nmslib Flask Server')
parser.add_argument('--tmp-dir', type=str, metavar='', default='/tmp/nmslib',
help='Path to temporary data directory')
parser.add_argument('--index-file', type=str, required=True,
help='Path to index file created by nmslib')
parser.add_argument('--problem', type=str, required=True,
help='Name of the T2T problem')
parser.add_argument('--data-dir', type=str, metavar='', default='/tmp',
help='Path to working data directory')
parser.add_argument('--serving-url', type=str, required=True,
help='Complete URL to TF Serving Inference server')
parser.add_argument('--host', type=str, metavar='', default='0.0.0.0',
help='Host to start server on')
parser.add_argument('--port', type=int, metavar='', default=8008,
help='Port to bind server to')
args = parser.parse_args(args)
args.tmp_dir = os.path.expanduser(args.tmp_dir)
args.index_file = os.path.expanduser(args.index_file)
args.data_dir = os.path.expanduser(args.data_dir)
return args
def parse_creator_args(args):
parser = argparse.ArgumentParser(prog='nmslib Index Creator')
parser.add_argument('--data-file', type=str, required=True,
help='Path to csv data file for human-readable data')
parser.add_argument('--index-file', type=str, metavar='', default='/tmp/index.nmslib',
help='Path to output index file')
parser.add_argument('--tmp-dir', type=str, metavar='', default='/tmp/nmslib',
help='Path to temporary data directory')
return parser.parse_args(args)
def server():
args = parse_server_args(sys.argv[1:])
if not os.path.isdir(args.tmp_dir):
os.makedirs(args.tmp_dir, exist_ok=True)
# Download relevant files if needed
index_file = maybe_download_gcs_file(args.index_file, args.tmp_dir)
search_engine = CodeSearchEngine(args.problem, args.data_dir, args.serving_url,
index_file)
search_server = CodeSearchServer(engine=search_engine,
host=args.host, port=args.port)
search_server.run()
def creator():
args = parse_creator_args(sys.argv[1:])
if not os.path.isdir(args.tmp_dir):
os.makedirs(args.tmp_dir, exist_ok=True)
data_file = maybe_download_gcs_file(args.data_file, args.tmp_dir)
# TODO(sanyamkapoor): parse data file into a numpy array
data = np.load(data_file)
tmp_index_file = os.path.join(args.tmp_dir, os.path.basename(args.index_file))
CodeSearchEngine.create_index(data, tmp_index_file)
maybe_upload_gcs_file(tmp_index_file, args.index_file)

View File

@ -0,0 +1,69 @@
import json
import requests
import nmslib
import numpy as np
from tensor2tensor import problems # pylint: disable=unused-import
from code_search.t2t.query import get_encoder_decoder, encode_query
class CodeSearchEngine:
"""This is a utility class which takes an nmslib
index file and a data file to return data from"""
def __init__(self, problem: str, data_dir: str, serving_url: str,
index_file: str):
self._serving_url = serving_url
self._problem = problem
self._data_dir = data_dir
self._index_file = index_file
self.index = CodeSearchEngine.nmslib_init()
self.index.loadIndex(index_file)
def embed(self, query_str):
"""This function gets the vector embedding from
the target inference server. The steps involved are
encoding the input query and decoding the responses
from the TF Serving service
TODO(sanyamkapoor): This code is still under construction
and only representative of the steps needed to build the
embedding
"""
encoder, decoder = get_encoder_decoder(self._problem, self._data_dir)
encoded_query = encode_query(encoder, query_str)
data = {"instances": [{"input": {"b64": encoded_query}}]}
response = requests.post(url=self._serving_url,
headers={'content-type': 'application/json'},
data=json.dumps(data))
result = response.json()
for prediction in result['predictions']:
prediction['outputs'] = decoder.decode(prediction['outputs'])
return result['predicts'][0]['outputs']
def query(self, query_str: str, k=2):
embedding = self.embed(query_str)
idxs, dists = self.index.knnQuery(embedding, k=k)
# TODO(sanyamkapoor): initialize data map and return
# list of dicts
# [
# {'src': self.data_map[idx], 'dist': dist}
# for idx, dist in zip(idxs, dists)
# ]
return idxs, dists
@staticmethod
def nmslib_init():
"""Initializes an nmslib index object"""
index = nmslib.init(method='hnsw', space='cosinesimil')
return index
@staticmethod
def create_index(data: np.array, save_path: str):
"""Add numpy data to the index and save to path"""
index = CodeSearchEngine.nmslib_init()
index.addDataPointBatch(data)
index.createIndex({'post': 2}, print_progress=True)
index.saveIndex(save_path)

View File

@ -1,10 +1,11 @@
from flask import Flask, request, abort, jsonify, make_response
from code_search.nmslib.search_engine import CodeSearchEngine
class CodeSearchServer:
"""This utility class wraps the search engine into
an HTTP server based on Flask"""
def __init__(self, engine, host='0.0.0.0', port=8008):
def __init__(self, engine: CodeSearchEngine, host='0.0.0.0', port=8008):
self.app = Flask(__name__)
self.host = host
self.port = port
@ -24,7 +25,7 @@ class CodeSearchServer:
abort(make_response(
jsonify(status=400, error="empty query"), 400))
result = self.engine.search(query_str)
result = self.engine.query(query_str)
return make_response(jsonify(result=result))
def run(self):

View File

@ -0,0 +1 @@
import code_search.t2t.similarity_transformer

View File

@ -0,0 +1,32 @@
import base64
import tensorflow as tf
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.utils import registry
def get_encoder_decoder(problem_name, data_dir):
"""Get encoder from the T2T problem.This might
vary by problem, keeping generic as a reference
"""
problem = registry.problem(problem_name)
hparams = tf.contrib.training.HParams(data_dir=data_dir)
problem.get_hparams(hparams)
return problem.feature_info["inputs"].encoder, \
problem.feature_info["targets"].encoder
def encode_query(encoder, query_str):
"""Encode the input query string using encoder. This
might vary by problem but keeping generic as a reference.
Note that in T2T problems, the 'targets' key is needed
even though it is ignored during inference.
See tensorflow/tensor2tensor#868"""
encoded_str = encoder.encode(query_str) + [text_encoder.EOS_ID]
features = {"inputs": tf.train.Feature(int64_list=tf.train.Int64List(value=encoded_str)),
"targets": tf.train.Feature(int64_list=tf.train.Int64List(value=[0]))}
example = tf.train.Example(features=tf.train.Features(feature=features))
return base64.b64encode(example.SerializeToString()).decode('utf-8')
def decode_result(decoder, list_ids):
return decoder.decode(list_ids)

View File

@ -0,0 +1,8 @@
tensor2tensor~=1.6.0
tensorflow~=1.8.0
oauth2client~=4.1.0
Flask~=1.0.0
nmslib~=1.7.0
numpy~=1.14.0
google-cloud-storage~=1.10.0
requests~=2.18.0

View File

@ -3,10 +3,10 @@ from setuptools import setup, find_packages
with open('requirements.txt', 'r') as f:
install_requires = f.readlines()
VERSION = '0.1.0'
VERSION = '0.0.1'
setup(name='code-search-index-server',
description='Kubeflow Code Search Demo - Index Server',
setup(name='code-search',
description='Kubeflow Code Search Demo',
url='https://www.github.com/kubeflow/examples',
author='Sanyam Kapoor',
author_email='sanyamkapoor@google.com',
@ -17,7 +17,7 @@ setup(name='code-search-index-server',
extras_require={},
entry_points={
'console_scripts': [
'nmslib-serve=nmslib_flask.cli:server',
'nmslib-create=nmslib_flask.cli:creator',
'nmslib-serve=code_search.nmslib.cli:server',
'nmslib-create=code_search.nmslib.cli:creator',
]
})

View File

@ -1,5 +1,11 @@
#!/usr/bin/env bash
##
# This script builds and pushes a Docker image containing
# "app" to Google Container Registry. It automatically tags
# a unique image for every run.
#
set -ex
PROJECT=${PROJECT:-}
@ -17,7 +23,7 @@ BUILD_IMAGE_TAG="code-search:v$(date +%Y%m%d)$([[ ${GPU} = "1" ]] && echo '-gpu'
# Directory of this script used as docker context
_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd "$_SCRIPT_DIR"
pushd "${_SCRIPT_DIR}/app"
docker build -t ${BUILD_IMAGE_TAG} --build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} .

View File

@ -1,9 +0,0 @@
FROM python:3.6
ADD . /app
WORKDIR /app
RUN pip install .
ENTRYPOINT ["sh"]

View File

@ -1,21 +0,0 @@
#!/usr/bin/env bash
set -e
PROJECT=${PROJECT:-}
BUILD_IMAGE_TAG=${BUILD_IMAGE_TAG:-nmslib:devel}
# Directory of this script used as docker context
_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
pushd "$_SCRIPT_DIR"
docker build -t ${BUILD_IMAGE_TAG} .
# Push image to GCR if PROJECT available
if [[ ! -z "${PROJECT}" ]]; then
docker tag ${BUILD_IMAGE_TAG} gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
docker push gcr.io/${PROJECT}/${BUILD_IMAGE_TAG}
fi
popd

View File

@ -1,71 +0,0 @@
import sys
import os
import argparse
import numpy as np
from nmslib_flask.gcs import maybe_download_gcs_file, maybe_upload_gcs_file
from nmslib_flask.search_engine import CodeSearchEngine
from nmslib_flask.search_server import CodeSearchServer
def parse_server_args(args):
parser = argparse.ArgumentParser(prog='nmslib Flask Server')
parser.add_argument('--index-file', type=str, required=True,
help='Path to index file created by nmslib')
parser.add_argument('--data-file', type=str, required=True,
help='Path to csv file for human-readable data')
parser.add_argument('--data-dir', type=str, metavar='', default='/tmp',
help='Path to working data directory')
parser.add_argument('--host', type=str, metavar='', default='0.0.0.0',
help='Host to start server on')
parser.add_argument('--port', type=int, metavar='', default=8008,
help='Port to bind server to')
return parser.parse_args(args)
def parse_creator_args(args):
parser = argparse.ArgumentParser(prog='nmslib Index Creator')
parser.add_argument('--data-file', type=str, required=True,
help='Path to csv data file for human-readable data')
parser.add_argument('--output-file', type=str, metavar='', default='/tmp/index.nmslib',
help='Path to output index file')
parser.add_argument('--data-dir', type=str, metavar='', default='/tmp',
help='Path to working data directory')
return parser.parse_args(args)
def server():
args = parse_server_args(sys.argv[1:])
if not os.path.isdir(args.data_dir):
os.makedirs(args.data_dir, exist_ok=True)
# Download relevant files if needed
index_file = maybe_download_gcs_file(args.index_file, args.data_dir)
data_file = maybe_download_gcs_file(args.data_file, args.data_dir)
search_engine = CodeSearchEngine(index_file, data_file)
search_server = CodeSearchServer(engine=search_engine,
host=args.host, port=args.port)
search_server.run()
def creator():
args = parse_creator_args(sys.argv[1:])
if not os.path.isdir(args.data_dir):
os.makedirs(args.data_dir, exist_ok=True)
data_file = maybe_download_gcs_file(args.data_file, args.data_dir)
# TODO(sanyamkapoor): parse data file into a numpy array
data = np.load(data_file)
tmp_output_file = os.path.join(args.data_dir, os.path.basename(args.output_file))
CodeSearchEngine.create_index(data, tmp_output_file)
maybe_upload_gcs_file(tmp_output_file, args.output_file)

View File

@ -1,46 +0,0 @@
import nmslib
import numpy as np
class CodeSearchEngine:
"""This is a utility class which takes an nmslib
index file and a data file to return data from"""
def __init__(self, index_file: str, data_file: str):
self._index_file = index_file
self._data_file = data_file
self.index = CodeSearchEngine.nmslib_init()
self.index.loadIndex(index_file)
# TODO: load the reverse-index map for actual code data
# self.data_map =
def embed(self, query_str):
# TODO load trained model and embed input strings
raise NotImplementedError
def query(self, query_str: str, k=2):
embedding = self.embed(query_str)
idxs, dists = self.index.knnQuery(embedding, k=k)
# TODO(sanyamkapoor): initialize data map and return
# list of dicts
# [
# {'src': self.data_map[idx], 'dist': dist}
# for idx, dist in zip(idxs, dists)
# ]
return idxs, dists
@staticmethod
def nmslib_init():
"""Initializes an nmslib index object"""
index = nmslib.init(method='hnsw', space='cosinesimil')
return index
@staticmethod
def create_index(data: np.array, save_path: str):
"""Add numpy data to the index and save to path"""
index = CodeSearchEngine.nmslib_init()
index.addDataPointBatch(data)
index.createIndex({'post': 2}, print_progress=True)
index.saveIndex(save_path)

View File

@ -1,4 +0,0 @@
Flask~=1.0.0
nmslib~=1.7.0
numpy~=1.14.0
google-cloud-storage~=1.10.0

View File

@ -14,6 +14,12 @@ libraries:
refSpec: master
name: tf-job
registry: kubeflow
tf-serving:
gitVersion:
commitSha: e1b2aee865866b2e7e4f8c41b34ae03b4c4bb0db
refSpec: master
name: tf-serving
registry: kubeflow
name: kubeflow
registries:
incubator:

View File

@ -0,0 +1,7 @@
local k = import "k.libsonnet";
local nms = import "nms.libsonnet";
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["nms-creator"];
std.prune(k.core.v1.list.new(nms.parts(params, env).creator))

View File

@ -0,0 +1,7 @@
local k = import "k.libsonnet";
local nms = import "nms.libsonnet";
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["nms-server"];
std.prune(k.core.v1.list.new(nms.parts(params, env).server))

View File

@ -0,0 +1,119 @@
local baseParams = std.extVar("__ksonnet/params").components["nmslib"];
{
nmsContainer(params, env):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: params.name + "-deployment",
namespace: env.namespace,
labels: {
app: params.name,
}
},
spec: {
replicas: params.replicas,
selector: {
matchLabels: {
app: params.name,
},
},
template: {
metadata: {
labels: {
app: params.name,
}
},
spec: {
containers: [
{
name: params.name,
image: params.image,
args: params.args,
ports: [
{
containerPort: 8008,
}
],
}
],
},
},
},
},
service(params, env):: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: params.name,
},
name: params.name,
namespace: env.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: http-mapping-" + params.name,
"prefix: /code-search/",
"rewrite: /",
"method: GET",
"service: " + params.name + "." + env.namespace + ":8008",
]),
},
},
spec: {
type: "ClusterIP",
selector: {
app: params.name,
},
ports: [
{
name: "nmslib-serve-http",
port: 8008,
targetPort: 8008,
},
],
},
},
parts(newParams, env):: {
local params = baseParams + newParams,
creator:: {
local creatorParams = params + {
args: [
"nmslib-create",
"--data-file=" + params.dataFile,
"--index-file=" + params.indexFile,
],
},
all: [
$.nmsContainer(creatorParams, env),
],
}.all,
server:: {
local serverParams = params + {
args: [
"nmslib-serve",
"--data-file=" + params.dataFile,
"--index-file=" + params.indexFile,
"--problem=" + params.problem,
"--data-dir=" + params.dataDir,
"--serving-url=" + params.servingUrl,
],
},
all: [
$.service(serverParams, env),
$.nmsContainer(serverParams, env),
],
}.all,
}
}

View File

@ -32,6 +32,18 @@
gsOutputDir: "null",
},
"nmslib": {
name: null,
replicas: 1,
image: "gcr.io/kubeflow-dev/code-search:v20180621-266e689",
dataFile: null,
indexFile: null,
problem: null,
dataDir: null,
servingUrl: null,
},
"t2t-translate-datagen": {
jobType: "datagen",
@ -70,5 +82,22 @@
model: "transformer",
hparams_set: "transformer_base_single_gpu",
},
"t2t-translate-serving": {
name: "t2t-translate",
modelName: "t2t-translate",
modelPath: "gs://kubeflow-examples/t2t-translate/translate_ende_wmt32k/output/export/Servo",
modelServerImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.8:latest",
cloud: "gcp",
gcpCredentialSecretName: "gcp-credentials",
},
"nms-creator": {
name: "nms-creator",
},
"nms-server": {
name: "nms-server",
},
},
}

View File

@ -0,0 +1,21 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2t-translate-serving"];
local k = import "k.libsonnet";
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
local name = params.name;
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
local tfServing = tfServingBase {
// Override parameters with user supplied parameters.
params+: updatedParams {
name: name,
},
};
std.prune(k.core.v1.list.new(tfServing.components))

View File

@ -0,0 +1,73 @@
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*
- [tf-serving](#tf-serving)
- [Quickstart](#quickstart)
- [Using the library](#using-the-library)
- [io.ksonnet.pkg.tf-serving](#ioksonnetpkgtf-serving)
- [Example](#example)
- [Parameters](#parameters)
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
# tf-serving
> TensorFlow serving is a server for TensorFlow models.
* [Quickstart](#quickstart)
* [Using Prototypes](#using-prototypes)
* [io.ksonnet.pkg.tf-serving](#io.ksonnet.pkg.tf-serving)
## Quickstart
*The following commands use the `io.ksonnet.pkg.tf-serving` prototype to generate Kubernetes YAML for tf-serving, and then deploys it to your Kubernetes cluster.*
First, create a cluster and install the ksonnet CLI (see root-level [README.md](rootReadme)).
If you haven't yet created a [ksonnet application](linkToSomewhere), do so using `ks init <app-name>`.
Finally, in the ksonnet application directory, run the following:
```shell
# Expand prototype as a Jsonnet file, place in a file in the
# `components/` directory. (YAML and JSON are also available.)
$ ks prototype use io.ksonnet.pkg.tf-serving tf-serving \
--name tf-serving \
--namespace default
# Apply to server.
$ ks apply -f tf-serving.jsonnet
```
## Using the library
The library files for tf-serving define a set of relevant *parts* (_e.g._, deployments, services, secrets, and so on) that can be combined to configure tf-serving for a wide variety of scenarios. For example, a database like Redis may need a secret to hold the user password, or it may have no password if it's acting as a cache.
This library provides a set of pre-fabricated "flavors" (or "distributions") of tf-serving, each of which is configured for a different use case. These are captured as ksonnet *prototypes*, which allow users to interactively customize these distributions for their specific needs.
These prototypes, as well as how to use them, are enumerated below.
### io.ksonnet.pkg.tf-serving
TensorFlow serving
#### Example
```shell
# Expand prototype as a Jsonnet file, place in a file in the
# `components/` directory. (YAML and JSON are also available.)
$ ks prototype use io.ksonnet.pkg.tf-serving tf-serving \
--name YOUR_NAME_HERE \
--model_path YOUR_MODEL_PATH_HERE
```
#### Parameters
The available options to pass prototype are:
* `--name=<name>`: Name to give to each of the components [string]
* `--model_path=<model_path>`: Path to the model. This can be a GCS path. [string]
[rootReadme]: https://github.com/ksonnet/mixins

View File

@ -0,0 +1,35 @@
{
"name": "tf-serving",
"apiVersion": "0.0.1",
"kind": "ksonnet.io/parts",
"description": "TensorFlow serving is a server for TensorFlow models.\n",
"author": "kubeflow team <kubeflow-team@google.com>",
"contributors": [
{
"name": "Jeremy Lewi",
"email": "jlewi@google.com"
}
],
"repository": {
"type": "git",
"url": "https://github.com/kubeflow/kubeflow"
},
"bugs": {
"url": "https://github.com/kubeflow/kubeflow/issues"
},
"keywords": [
"kubeflow",
"tensorflow",
"database"
],
"quickStart": {
"prototype": "io.ksonnet.pkg.tf-serving",
"componentName": "tf-serving",
"flags": {
"name": "tf-serving",
"namespace": "default"
},
"comment": "Run TensorFlow Serving"
},
"license": "Apache 2.0"
}

View File

@ -0,0 +1,24 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.tf-serving
// @description TensorFlow serving
// @shortDescription A TensorFlow serving deployment
// @param name string Name to give to each of the components
local k = import "k.libsonnet";
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
local name = import "param://name";
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
local tfServing = tfServingBase {
// Override parameters with user supplied parameters.
params+: updatedParams {
name: name,
},
};
std.prune(k.core.v1.list.new(tfServing.components))

View File

@ -0,0 +1,387 @@
{
util:: import "kubeflow/tf-serving/util.libsonnet",
// Parameters are intended to be late bound.
params:: {
name: null,
numGpus: 0,
labels: {
app: $.params.name,
},
modelName: $.params.name,
modelPath: null,
modelStorageType: "cloud",
version: "v1",
firstVersion: true,
deployIstio: false,
deployHttpProxy: false,
defaultHttpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2",
httpProxyImage: "",
httpProxyImageToUse: if $.params.httpProxyImage == "" then
$.params.defaultHttpProxyImage
else
$.params.httpProxyImage,
serviceType: "ClusterIP",
// If users want to override the image then can override defaultCpuImage and/or defaultGpuImage
// in which case the image used will still depend on whether GPUs are used or not.
// Users can also override modelServerImage in which case the user supplied value will always be used
// regardless of numGpus.
defaultCpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.7:v20180604-0da89b8a",
defaultGpuImage: "gcr.io/kubeflow-images-public/tensorflow-serving-1.6gpu:v20180604-0da89b8a",
modelServerImage: if $.params.numGpus == 0 then
$.params.defaultCpuImage
else
$.params.defaultGpuImage,
// Whether or not to enable s3 parameters
s3Enable:: false,
// Which cloud to use
cloud:: null,
},
// Parametes specific to GCP.
gcpParams:: {
gcpCredentialSecretName: "",
} + $.params,
// Parameters that control S3 access
// params overrides s3params because params can be overwritten by the user to override the defaults.
s3params:: {
// Name of the k8s secrets containing S3 credentials
s3SecretName: "",
// Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
s3SecretAccesskeyidKeyName: "",
// Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
s3SecretSecretaccesskeyKeyName: "",
// S3 region
s3AwsRegion: "us-west-1",
// TODO(jlewi): We should use util.toBool to automatically conver to actual boolean values.
// The use of strings is left over from when they were prototype parameters which only supports string type.
// true Whether or not to use https for S3 connections
s3UseHttps: "true",
// Whether or not to verify https certificates for S3 connections
s3VerifySsl: "true",
// URL for your s3-compatible endpoint.
s3Endpoint: "http://s3.us-west-1.amazonaws.com,",
} + $.params,
components:: {
all:: [
// Default routing rule for the first version of model.
if $.util.toBool($.params.deployIstio) && $.util.toBool($.params.firstVersion) then
$.parts.defaultRouteRule,
] +
// TODO(jlewi): It would be better to structure s3 as a mixin.
// As an example it would be great to allow S3 and GCS parameters
// to be enabled simultaneously. This should be doable because
// each entails adding a set of environment variables and volumes
// to the containers. These volumes/environment variables shouldn't
// overlap so there's no reason we shouldn't be able to just add
// both modifications to the base container.
// I think we want to restructure things as mixins so they can just
// be added.
if $.params.s3Enable then
[
$.s3parts.tfService,
$.s3parts.tfDeployment,
]
else if $.params.cloud == "gcp" then
[
$.gcpParts.tfService,
$.gcpParts.tfDeployment,
]
else
[
$.parts.tfService,
$.parts.tfDeployment,
],
}.all,
parts:: {
// We define the containers one level beneath parts because combined with jsonnet late binding
// this makes it easy for users to override specific bits of the container.
tfServingContainerBase:: {
name: $.params.name,
image: $.params.modelServerImage,
imagePullPolicy: "IfNotPresent",
args: [
"/usr/bin/tensorflow_model_server",
"--port=9000",
"--rest_api_port=8000",
"--model_name=" + $.params.modelName,
"--model_base_path=" + $.params.modelPath,
],
ports: [
{
containerPort: 9000,
},
{
containerPort: 8000,
},
],
// TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
// model-server doesn't have something we can use out of the box.
resources: {
requests: {
memory: "1Gi",
cpu: "1",
},
limits: {
memory: "4Gi",
cpu: "4",
},
},
// The is user and group should be defined in the Docker image.
// Per best practices we don't run as the root user.
securityContext: {
runAsUser: 1000,
fsGroup: 1000,
},
volumeMounts+: if $.params.modelStorageType == "nfs" then [{
name: "nfs",
mountPath: "/mnt",
}]
else [],
}, // tfServingContainer
tfServingContainer+: $.parts.tfServingContainerBase +
if $.params.numGpus > 0 then
{
resources+: {
limits+: {
"nvidia.com/gpu": $.params.numGpus,
},
},
}
else {},
tfServingMetadata+: {
labels: $.params.labels { version: $.params.version },
annotations: {
"sidecar.istio.io/inject": if $.util.toBool($.params.deployIstio) then "true",
},
},
httpProxyContainer:: {
name: $.params.name + "-http-proxy",
image: $.params.httpProxyImageToUse,
imagePullPolicy: "IfNotPresent",
command: [
"python",
"/usr/src/app/server.py",
"--port=8000",
"--rpc_port=9000",
"--rpc_timeout=10.0",
],
env: [],
ports: [
{
containerPort: 8000,
},
],
resources: {
requests: {
memory: "1Gi",
cpu: "1",
},
limits: {
memory: "4Gi",
cpu: "4",
},
},
securityContext: {
runAsUser: 1000,
fsGroup: 1000,
},
}, // httpProxyContainer
tfDeployment: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: $.params.name + "-" + $.params.version,
namespace: $.params.namespace,
labels: $.params.labels,
},
spec: {
template: {
metadata: $.parts.tfServingMetadata,
spec: {
containers: [
$.parts.tfServingContainer,
if $.util.toBool($.params.deployHttpProxy) then
$.parts.httpProxyContainer,
],
volumes+: if $.params.modelStorageType == "nfs" then
[{
name: "nfs",
persistentVolumeClaim: {
claimName: $.params.nfsPVC,
},
}]
else [],
},
},
},
}, // tfDeployment
tfService: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: $.params.labels,
name: $.params.name,
namespace: $.params.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tfserving-mapping-" + $.params.name + "-get",
"prefix: /models/" + $.params.name + "/",
"rewrite: /",
"method: GET",
"service: " + $.params.name + "." + $.params.namespace + ":8000",
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tfserving-mapping-" + $.params.name + "-post",
"prefix: /models/" + $.params.name + "/",
"rewrite: /model/" + $.params.name + ":predict",
"method: POST",
"service: " + $.params.name + "." + $.params.namespace + ":8000",
]),
}, //annotations
},
spec: {
ports: [
{
name: "grpc-tf-serving",
port: 9000,
targetPort: 9000,
},
{
name: "http-tf-serving-proxy",
port: 8000,
targetPort: 8000,
},
],
selector: $.params.labels,
type: $.params.serviceType,
},
}, // tfService
defaultRouteRule: {
apiVersion: "config.istio.io/v1alpha2",
kind: "RouteRule",
metadata: {
name: $.params.name + "-default",
namespace: $.params.namespace,
},
spec: {
destination: {
name: $.params.name,
},
precedence: 0,
route: [
{
labels: { version: $.params.version },
},
],
},
},
}, // parts
// Parts specific to S3
s3parts:: $.parts {
s3Env:: [
{ name: "AWS_ACCESS_KEY_ID", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretAccesskeyidKeyName } } },
{ name: "AWS_SECRET_ACCESS_KEY", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretSecretaccesskeyKeyName } } },
{ name: "AWS_REGION", value: $.s3params.s3AwsRegion },
{ name: "S3_REGION", value: $.s3params.s3AwsRegion },
{ name: "S3_USE_HTTPS", value: $.s3params.s3UseHttps },
{ name: "S3_VERIFY_SSL", value: $.s3params.s3VerifySsl },
{ name: "S3_ENDPOINT", value: $.s3params.s3Endpoint },
],
tfServingContainer: $.parts.tfServingContainer {
env+: $.s3parts.s3Env,
},
tfDeployment: $.parts.tfDeployment {
spec: +{
template: +{
metadata: $.parts.tfServingMetadata,
spec: +{
containers: [
$.s3parts.tfServingContainer,
if $.util.toBool($.params.deployHttpProxy) then
$.parts.httpProxyContainer,
],
},
},
},
}, // tfDeployment
}, // s3parts
// Parts specific to GCP
gcpParts:: $.parts {
gcpEnv:: [
if $.gcpParams.gcpCredentialSecretName != "" then
{ name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" },
],
tfServingContainer: $.parts.tfServingContainer {
env+: $.gcpParts.gcpEnv,
volumeMounts+: [
if $.gcpParams.gcpCredentialSecretName != "" then
{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
},
],
},
tfDeployment: $.parts.tfDeployment {
spec+: {
template+: {
metadata: $.parts.tfServingMetadata,
spec+: {
containers: [
$.gcpParts.tfServingContainer,
if $.util.toBool($.params.deployHttpProxy) then
$.parts.httpProxyContainer,
],
volumes: [
if $.gcpParams.gcpCredentialSecretName != "" then
{
name: "gcp-credentials",
secret: {
secretName: $.gcpParams.gcpCredentialSecretName,
},
},
],
},
},
},
}, // tfDeployment
}, // gcpParts
}

View File

@ -0,0 +1,25 @@
// Some useful routines.
{
// Convert a string to upper case.
upper:: function(x) {
local cp(c) = std.codepoint(c),
local upLetter(c) = if cp(c) >= 97 && cp(c) < 123 then
std.char(cp(c) - 32)
else c,
result:: std.join("", std.map(upLetter, std.stringChars(x))),
}.result,
// Convert non-boolean types like string,number to a boolean.
// This is primarily intended for dealing with parameters that should be booleans.
toBool:: function(x) {
result::
if std.type(x) == "boolean" then
x
else if std.type(x) == "string" then
$.upper(x) == "TRUE"
else if std.type(x) == "number" then
x != 0
else
false,
}.result,
}

View File

@ -1,2 +0,0 @@
tensor2tensor~=1.6.0
oauth2client~=4.1.0

View File

@ -1,2 +0,0 @@
from . import function_summarizer
from . import docstring_lm