mirror of https://github.com/kubeflow/examples.git
Upgrade notebook commands and other relevant changes (#229)
* Replace double quotes for field values (ks convention) * Recreate the ksonnet application from scratch * Fix pip commands to find requirements and redo installation, fix ks param set * Use sed replace instead of ks param set. * Add cells to first show JobSpec and then apply * Upgrade T2T, fix conflicting problem types * Update docker images * Reduce to 200k samples for vocab * Use Jupyter notebook service account * Add illustrative gsutil commands to show output files, specify index files glob explicitly * List files after index creation step * Use the model in current repository and not upstream t2t * Update Docker images * Expose TF Serving Rest API at 9001 * Spawn terminal from the notebooks ui, no need to go to lab
This commit is contained in:
parent
0843cdad66
commit
f9873e6ac4
|
|
@ -37,17 +37,15 @@ you replace this with the true FQDN of your Kubeflow cluster in any subsequent i
|
||||||
* In the **Image** text field, enter `gcr.io/kubeflow-images-public/kubeflow-codelab-notebook:v20180808-v0.2-22-gcfdcb12`.
|
* In the **Image** text field, enter `gcr.io/kubeflow-images-public/kubeflow-codelab-notebook:v20180808-v0.2-22-gcfdcb12`.
|
||||||
This image contains all the pre-requisites needed for the demo.
|
This image contains all the pre-requisites needed for the demo.
|
||||||
|
|
||||||
* Once spawned, you should be redirected to the notebooks UI. We intend to go to the JupyterLab home
|
* Once spawned, you should be redirected to the Jupyter Notebooks UI.
|
||||||
page which is available at the URL - **https://kubeflow.example.com/user/<ACCOUNT_NAME>/lab**.
|
|
||||||
**TIP**: Simply point the browser to **/lab** instead of the **/tree** path in the URL.
|
|
||||||
|
|
||||||
* Spawn a new Terminal and run
|
* Spawn a new Terminal and run
|
||||||
```
|
```
|
||||||
$ git clone --branch=master --depth=1 https://github.com/kubeflow/examples
|
$ git clone --branch=master --depth=1 https://github.com/kubeflow/examples
|
||||||
```
|
```
|
||||||
This will create an examples folder. It is safe to close the terminal now.
|
This will create an examples folder. It is safe to close the terminal now.
|
||||||
|
|
||||||
* Refresh the File Explorer (typically to the left) and navigate to `examples/code_search`. Open
|
* Navigate back to the Jupyter Notebooks UI and navigate to `examples/code_search`. Open
|
||||||
the Jupyter notebook `code-search.ipynb` and follow it along.
|
the Jupyter notebook `code-search.ipynb` and follow it along.
|
||||||
|
|
||||||
# Acknowledgements
|
# Acknowledgements
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,14 @@
|
||||||
"source": [
|
"source": [
|
||||||
"## Install dependencies\n",
|
"## Install dependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Let us install all the Python dependencies. Note that everything must be done with `Python 2`. This will take a while and only needs to be run once."
|
"Let us install all the Python dependencies. Note that everything must be done with `Python 2`. This will take a while the first time."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Verify Version Information"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -30,9 +37,30 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"! pip2 install https://github.com/kubeflow/batch-predict/tarball/master\n",
|
"%%bash\n",
|
||||||
"\n",
|
"\n",
|
||||||
"! pip2 install -r src/requirements.txt"
|
"echo \"Pip Version Info: \" && python2 --version && python2 -m pip --version && echo\n",
|
||||||
|
"echo \"Google Cloud SDK Info: \" && gcloud --version && echo\n",
|
||||||
|
"echo \"Ksonnet Version Info: \" && ks version && echo\n",
|
||||||
|
"echo \"Kubectl Version Info: \" && kubectl version"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Install Pip Packages"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! python2 -m pip install -U pip"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -41,8 +69,9 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Only for BigQuery cells\n",
|
"# Code Search dependencies\n",
|
||||||
"! pip2 install pandas-gbq"
|
"! python2 -m pip install --user https://github.com/kubeflow/batch-predict/tarball/master\n",
|
||||||
|
"! python2 -m pip install --user -r src/requirements.txt"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -51,6 +80,17 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# BigQuery Cell Dependencies\n",
|
||||||
|
"! python2 -m pip install --user pandas-gbq"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# NOTE: The RuntimeWarnings (if any) are harmless. See ContinuumIO/anaconda-issues#6678.\n",
|
||||||
"from pandas.io import gbq"
|
"from pandas.io import gbq"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -72,10 +112,8 @@
|
||||||
"# Configuration Variables. Modify as desired.\n",
|
"# Configuration Variables. Modify as desired.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"PROJECT = 'kubeflow-dev'\n",
|
"PROJECT = 'kubeflow-dev'\n",
|
||||||
"CLUSTER_NAME = 'kubeflow-latest'\n",
|
|
||||||
"CLUSTER_REGION = 'us-east1-d'\n",
|
|
||||||
"CLUSTER_NAMESPACE = 'kubeflow-latest'\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Dataflow Related Variables.\n",
|
||||||
"TARGET_DATASET = 'code_search'\n",
|
"TARGET_DATASET = 'code_search'\n",
|
||||||
"WORKING_DIR = 'gs://kubeflow-examples/t2t-code-search/notebook-demo'\n",
|
"WORKING_DIR = 'gs://kubeflow-examples/t2t-code-search/notebook-demo'\n",
|
||||||
"WORKER_MACHINE_TYPE = 'n1-highcpu-32'\n",
|
"WORKER_MACHINE_TYPE = 'n1-highcpu-32'\n",
|
||||||
|
|
@ -83,10 +121,6 @@
|
||||||
"\n",
|
"\n",
|
||||||
"# DO NOT MODIFY. These are environment variables to be used in a bash shell.\n",
|
"# DO NOT MODIFY. These are environment variables to be used in a bash shell.\n",
|
||||||
"%env PROJECT $PROJECT\n",
|
"%env PROJECT $PROJECT\n",
|
||||||
"%env CLUSTER_NAME $CLUSTER_NAME\n",
|
|
||||||
"%env CLUSTER_REGION $CLUSTER_REGION\n",
|
|
||||||
"%env CLUSTER_NAMESPACE $CLUSTER_NAMESPACE\n",
|
|
||||||
"\n",
|
|
||||||
"%env TARGET_DATASET $TARGET_DATASET\n",
|
"%env TARGET_DATASET $TARGET_DATASET\n",
|
||||||
"%env WORKING_DIR $WORKING_DIR\n",
|
"%env WORKING_DIR $WORKING_DIR\n",
|
||||||
"%env WORKER_MACHINE_TYPE $WORKER_MACHINE_TYPE\n",
|
"%env WORKER_MACHINE_TYPE $WORKER_MACHINE_TYPE\n",
|
||||||
|
|
@ -99,7 +133,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"### Setup Authorization\n",
|
"### Setup Authorization\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In a Kubeflow cluster, we already have the key credentials available with each pod and will re-use them to authenticate. This will allow us to submit `TFJob`s and execute `Dataflow` pipelines. We also set the new context for the Code Search Ksonnet application."
|
"In a Kubeflow cluster on GKE, we already have the Google Application Credentials mounted onto each Pod. We can simply point `gcloud` to activate that service account."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -111,13 +145,35 @@
|
||||||
"%%bash\n",
|
"%%bash\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Activate Service Account provided by Kubeflow.\n",
|
"# Activate Service Account provided by Kubeflow.\n",
|
||||||
"gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}\n",
|
"gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Additionally, to interact with the underlying cluster, we configure `kubectl`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Get KUBECONFIG for the desired cluster.\n",
|
"kubectl config set-cluster kubeflow --server=https://kubernetes.default --certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt\n",
|
||||||
"gcloud container clusters get-credentials ${CLUSTER_NAME} --region ${CLUSTER_REGION}\n",
|
"kubectl config set-credentials jupyter --token \"$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\"\n",
|
||||||
"\n",
|
"kubectl config set-context kubeflow --cluster kubeflow --user jupyter\n",
|
||||||
"# Set the namespace of the context.\n",
|
"kubectl config use-context kubeflow"
|
||||||
"kubectl config set contexts.$(kubectl config current-context).namespace ${CLUSTER_NAMESPACE}"
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Collectively, these allow us to interact with Google Cloud Services as well as the Kubernetes Cluster directly to submit `TFJob`s and execute `Dataflow` pipelines."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -126,7 +182,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"### Setup Ksonnet Application\n",
|
"### Setup Ksonnet Application\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This will use the context we've set above and provide it as a new environment to the Ksonnet application."
|
"We now point the Ksonnet application to the underlying Kubernetes cluster."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -139,32 +195,15 @@
|
||||||
"\n",
|
"\n",
|
||||||
"cd kubeflow\n",
|
"cd kubeflow\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Update Ksonnet application to the context set earlier\n",
|
"# Update Ksonnet to point to the Kubernetes Cluster\n",
|
||||||
"ks env add code-search --context=$(kubectl config current-context)\n",
|
"ks env add code-search --context $(kubectl config current-context)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Update the Working Directory of the application\n",
|
"# Update the Working Directory of the application\n",
|
||||||
"ks param set t2t-code-search workingDir ${WORKING_DIR}"
|
"sed -i'' \"s,gs://example/prefix,${WORKING_DIR},\" components/params.libsonnet\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Verify Version Information"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%bash\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"echo \"Pip Version Info: \" && pip2 --version && echo\n",
|
"# FIXME(sanyamkapoor): This command completely replaces previous configurations.\n",
|
||||||
"echo \"Google Cloud SDK Info: \" && gcloud --version && echo\n",
|
"# Hence, using string replacement in file.\n",
|
||||||
"echo \"Ksonnet Version Info: \" && ks version && echo\n",
|
"# ks param set t2t-code-search workingDir ${WORKING_DIR}"
|
||||||
"echo \"Kubectl Version Info: \" && kubectl version"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -173,7 +212,9 @@
|
||||||
"source": [
|
"source": [
|
||||||
"## View Github Files\n",
|
"## View Github Files\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This is the query that is run as the first step of the Pre-Processing pipeline and is sent through a set of transformations. This is illustrative of the rows being processed in the pipeline we trigger next."
|
"This is the query that is run as the first step of the Pre-Processing pipeline and is sent through a set of transformations. This is illustrative of the rows being processed in the pipeline we trigger next.\n",
|
||||||
|
"\n",
|
||||||
|
"**WARNING**: The table is large and the query can take a few minutes to complete."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -294,7 +335,9 @@
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"scrolled": false
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"query = \"\"\"\n",
|
"query = \"\"\"\n",
|
||||||
|
|
@ -308,13 +351,48 @@
|
||||||
"gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
|
"gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This pipeline also writes a set of CSV files which contain function and docstring pairs delimited by a comma. Here, we list a subset of them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"LIMIT=10\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/data/*.csv | head -n ${LIMIT}"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Prepare Dataset for Training\n",
|
"## Prepare Dataset for Training\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this step we will use `t2t-datagen` to convert the transformed data above into the `TFRecord` format. We will run this job on the Kubeflow cluster."
|
"We will use `t2t-datagen` to convert the transformed data above into the `TFRecord` format.\n",
|
||||||
|
"\n",
|
||||||
|
"**TIP**: Use `ks show` to view the Resource Spec submitted."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c t2t-code-search-datagen"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -334,7 +412,43 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Execute Tensorflow Training"
|
"Once this job finishes, the data directory should have a vocabulary file and a list of `TFRecords` prefixed by the problem name which in our case is `github_function_docstring_extended`. Here, we list a subset of them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"LIMIT=10\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/data/vocab*\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/data/*train* | head -n ${LIMIT}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Execute Tensorflow Training\n",
|
||||||
|
"\n",
|
||||||
|
"Once, the `TFRecords` are generated, we will use `t2t-trainer` to execute the training."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c t2t-code-search-trainer"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -354,7 +468,40 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Export Tensorflow Model"
|
"This will generate TensorFlow model checkpoints which is illustrated below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/output/*ckpt*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Export Tensorflow Model\n",
|
||||||
|
"\n",
|
||||||
|
"We now use `t2t-exporter` to export the `TFModel`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c t2t-code-search-exporter"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -370,6 +517,24 @@
|
||||||
"ks apply code-search -c t2t-code-search-exporter"
|
"ks apply code-search -c t2t-code-search-exporter"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once completed, this will generate a TensorFlow `SavedModel` which we will further use for both online (via `TF Serving`) and offline inference (via `Kubeflow Batch Prediction`)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/output/export/Servo"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|
@ -398,7 +563,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"### Configuration\n",
|
"### Configuration\n",
|
||||||
"\n",
|
"\n",
|
||||||
"First, select a Exported Model version from the `${WORKING_DIR}/output/export/Servo`. This should be name of a folder with UNIX Seconds Timestamp like `1533685294`. Below, we automatically do that by selecting the folder which represents the latest timestamp."
|
"First, select a Exported Model version from the `${WORKING_DIR}/output/export/Servo` as seen above. This should be name of a folder with UNIX Seconds Timestamp like `1533685294`. Below, we automatically do that by selecting the folder which represents the latest timestamp."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -442,14 +607,17 @@
|
||||||
"\n",
|
"\n",
|
||||||
"cd src\n",
|
"cd src\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"JOB_NAME=\"compute-function-embeddings-$(date +'%Y%m%d-%H%M%S')\"\n",
|
||||||
|
"PROBLEM=github_function_docstring_extended\n",
|
||||||
|
"\n",
|
||||||
"python2 -m code_search.dataflow.cli.create_function_embeddings \\\n",
|
"python2 -m code_search.dataflow.cli.create_function_embeddings \\\n",
|
||||||
" --runner DataflowRunner\n",
|
" --runner DataflowRunner \\\n",
|
||||||
" --project \"${PROJECT}\" \\\n",
|
" --project \"${PROJECT}\" \\\n",
|
||||||
" --target_dataset \"${TARGET_DATASET}\" \\\n",
|
" --target_dataset \"${TARGET_DATASET}\" \\\n",
|
||||||
" --problem github_function_docstring \\\n",
|
" --problem \"${PROBLEM}\" \\\n",
|
||||||
" --data_dir \"${WORKING_DIR}/data\" \\\n",
|
" --data_dir \"${WORKING_DIR}/data\" \\\n",
|
||||||
" --saved_model_dir \"${WORKING_DIR}/output/export/Servo/${MODEL_VERSION}\" \\\n",
|
" --saved_model_dir \"${WORKING_DIR}/output/export/Servo/${MODEL_VERSION}\" \\\n",
|
||||||
" --job_name compute-function-embeddings\n",
|
" --job_name \"${JOB_NAME}\" \\\n",
|
||||||
" --temp_location \"${WORKING_DIR}/dataflow/temp\" \\\n",
|
" --temp_location \"${WORKING_DIR}/dataflow/temp\" \\\n",
|
||||||
" --staging_location \"${WORKING_DIR}/dataflow/staging\" \\\n",
|
" --staging_location \"${WORKING_DIR}/dataflow/staging\" \\\n",
|
||||||
" --worker_machine_type \"${WORKER_MACHINE_TYPE}\" \\\n",
|
" --worker_machine_type \"${WORKER_MACHINE_TYPE}\" \\\n",
|
||||||
|
|
@ -480,13 +648,46 @@
|
||||||
"gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
|
"gbq.read_gbq(query, dialect='standard', project_id=PROJECT)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The pipeline also generates a set of CSV files which will be useful to generate the search index."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"LIMIT=10\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/data/*index*.csv | head -n ${LIMIT}"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create Search Index\n",
|
"## Create Search Index\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We now create the Search Index from the computed embeddings so that during a query we can do a k-Nearest Neighbor search to give out semantically similar results."
|
"We now create the Search Index from the computed embeddings. This facilitates k-Nearest Neighbor search to for semantically similar results."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c search-index-creator"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -506,7 +707,18 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Using the CSV files generated from the previous step, this creates an index using [NMSLib](https://github.com/nmslib/nmslib). A unified CSV file containing all the code examples for a human-readable reverse lookup during the query, is also created in the `WORKING_DIR`."
|
"Using the CSV files generated from the previous step, this creates an index using [NMSLib](https://github.com/nmslib/nmslib). A unified CSV file containing all the code examples for a human-readable reverse lookup during the query, is also created."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"gsutil ls ${WORKING_DIR}/code_search_index*"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -518,6 +730,19 @@
|
||||||
"We've seen offline inference during the computation of embeddings. For online inference, we deploy the exported Tensorflow model above using [Tensorflow Serving](https://www.tensorflow.org/serving/)."
|
"We've seen offline inference during the computation of embeddings. For online inference, we deploy the exported Tensorflow model above using [Tensorflow Serving](https://www.tensorflow.org/serving/)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c t2t-code-search-serving"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|
@ -540,6 +765,19 @@
|
||||||
"We finally deploy the Search UI which allows the user to input arbitrary strings and see a list of results corresponding to semantically similar Python functions. This internally uses the inference server we just deployed."
|
"We finally deploy the Search UI which allows the user to input arbitrary strings and see a list of results corresponding to semantically similar Python functions. This internally uses the inference server we just deployed."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%bash\n",
|
||||||
|
"\n",
|
||||||
|
"cd kubeflow\n",
|
||||||
|
"\n",
|
||||||
|
"ks show code-search -c search-index-server"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ ARG BASE_IMAGE_TAG=1.8.0
|
||||||
|
|
||||||
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
|
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
|
||||||
|
|
||||||
RUN pip --no-cache-dir install tensor2tensor~=1.6.0 oauth2client~=4.1.0 &&\
|
RUN pip --no-cache-dir install tensor2tensor~=1.7.0 oauth2client~=4.1.0 &&\
|
||||||
apt-get update && apt-get install -y jq &&\
|
apt-get update && apt-get install -y jq &&\
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
@ -15,6 +15,4 @@ ENV PYTHONIOENCODING=utf-8 T2T_USR_DIR=/app/code_search/t2t
|
||||||
|
|
||||||
VOLUME ["/data", "/output"]
|
VOLUME ["/data", "/output"]
|
||||||
|
|
||||||
EXPOSE 8008
|
|
||||||
|
|
||||||
ENTRYPOINT ["bash"]
|
ENTRYPOINT ["bash"]
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - &&\
|
||||||
numpy~=1.14.0 \
|
numpy~=1.14.0 \
|
||||||
oauth2client~=4.1.0 \
|
oauth2client~=4.1.0 \
|
||||||
requests~=2.18.0 \
|
requests~=2.18.0 \
|
||||||
tensor2tensor~=1.6.0 &&\
|
tensor2tensor~=1.7.0 &&\
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
ADD src/ /src
|
ADD src/ /src
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,17 @@
|
||||||
apiVersion: 0.2.0
|
apiVersion: 0.2.0
|
||||||
environments:
|
environments:
|
||||||
kf-cs:
|
default:
|
||||||
destination:
|
destination:
|
||||||
namespace: kubeflow
|
namespace: kubeflow
|
||||||
server: https://35.232.164.190
|
server: https://35.237.202.148
|
||||||
k8sVersion: v1.9.6
|
k8sVersion: v1.9.7
|
||||||
path: kf-cs
|
path: default
|
||||||
kind: ksonnet.io/app
|
kind: ksonnet.io/app
|
||||||
libraries:
|
libraries:
|
||||||
tf-serving:
|
tf-serving:
|
||||||
name: tf-serving
|
name: tf-serving
|
||||||
registry: kubeflow
|
registry: kubeflow
|
||||||
version: e95f94a1a97a0974ada734895d590b5ba565fa77
|
version: ab6084349673e6405ae486eb3be2141e3550643c
|
||||||
name: kubeflow
|
name: kubeflow
|
||||||
registries:
|
registries:
|
||||||
incubator:
|
incubator:
|
||||||
|
|
@ -19,5 +19,5 @@ registries:
|
||||||
uri: github.com/ksonnet/parts/tree/master/incubator
|
uri: github.com/ksonnet/parts/tree/master/incubator
|
||||||
kubeflow:
|
kubeflow:
|
||||||
protocol: github
|
protocol: github
|
||||||
uri: https://github.com/kubeflow/kubeflow/tree/v0.2.2/kubeflow
|
uri: https://github.com/kubeflow/kubeflow/tree/master/kubeflow
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@
|
||||||
numPsGpu: 0,
|
numPsGpu: 0,
|
||||||
train_steps: 100,
|
train_steps: 100,
|
||||||
eval_steps: 10,
|
eval_steps: 10,
|
||||||
image: 'gcr.io/kubeflow-dev/code-search:v20180814-66d27b9',
|
image: 'gcr.io/kubeflow-dev/code-search:v20180817-732333a',
|
||||||
imageGpu: 'gcr.io/kubeflow-dev/code-search:v20180814-66d27b9-gpu',
|
imageGpu: 'gcr.io/kubeflow-dev/code-search:v20180817-732333a-gpu',
|
||||||
imagePullSecrets: [],
|
imagePullSecrets: [],
|
||||||
dataDir: 'null',
|
dataDir: 'null',
|
||||||
outputDir: 'null',
|
outputDir: 'null',
|
||||||
|
|
@ -18,34 +18,35 @@
|
||||||
hparams_set: 'null',
|
hparams_set: 'null',
|
||||||
},
|
},
|
||||||
"t2t-code-search": {
|
"t2t-code-search": {
|
||||||
|
name: 't2t-code-search',
|
||||||
workingDir: 'gs://example/prefix',
|
workingDir: 'gs://example/prefix',
|
||||||
problem: 'github_function_docstring_extended',
|
problem: 'cs_github_function_docstring',
|
||||||
model: 'similarity_transformer',
|
model: 'cs_similarity_transformer',
|
||||||
hparams_set: 'transformer_tiny',
|
hparams_set: 'transformer_tiny',
|
||||||
},
|
},
|
||||||
"t2t-code-search-datagen": {
|
"t2t-code-search-datagen": {
|
||||||
jobType: 'datagen',
|
jobType: 'datagen',
|
||||||
name: 't2t-code-search-datagen',
|
name: 't2t-code-search-datagen',
|
||||||
problem: $.components["t2t-code-search"].problem,
|
problem: $.components['t2t-code-search'].problem,
|
||||||
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
||||||
},
|
},
|
||||||
"t2t-code-search-trainer": {
|
"t2t-code-search-trainer": {
|
||||||
jobType: 'trainer',
|
jobType: 'trainer',
|
||||||
name: 't2t-code-search-trainer',
|
name: 't2t-code-search-trainer',
|
||||||
problem: $.components["t2t-code-search"].problem,
|
problem: $.components['t2t-code-search'].problem,
|
||||||
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
||||||
outputDir: $.components['t2t-code-search'].workingDir + '/output',
|
outputDir: $.components['t2t-code-search'].workingDir + '/output',
|
||||||
model: $.components["t2t-code-search"].model,
|
model: $.components['t2t-code-search'].model,
|
||||||
hparams_set: $.components["t2t-code-search"].hparams_set,
|
hparams_set: $.components['t2t-code-search']['hparams_set'],
|
||||||
},
|
},
|
||||||
"t2t-code-search-exporter": {
|
"t2t-code-search-exporter": {
|
||||||
jobType: 'exporter',
|
jobType: 'exporter',
|
||||||
name: 't2t-code-search-exporter',
|
name: 't2t-code-search-exporter',
|
||||||
problem: $.components["t2t-code-search"].problem,
|
problem: $.components['t2t-code-search'].problem,
|
||||||
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
||||||
outputDir: $.components['t2t-code-search'].workingDir + '/output',
|
outputDir: $.components['t2t-code-search'].workingDir + '/output',
|
||||||
model: $.components["t2t-code-search"].model,
|
model: $.components['t2t-code-search'].model,
|
||||||
hparams_set: $.components["t2t-code-search"].hparams_set,
|
hparams_set: $.components['t2t-code-search']['hparams_set'],
|
||||||
},
|
},
|
||||||
"t2t-code-search-serving": {
|
"t2t-code-search-serving": {
|
||||||
name: 't2t-code-search',
|
name: 't2t-code-search',
|
||||||
|
|
@ -57,7 +58,7 @@
|
||||||
},
|
},
|
||||||
nmslib: {
|
nmslib: {
|
||||||
replicas: 1,
|
replicas: 1,
|
||||||
image: 'gcr.io/kubeflow-dev/code-search-ui:v20180806-7b0fcaa',
|
image: 'gcr.io/kubeflow-dev/code-search-ui:v20180817-0d4a60d',
|
||||||
problem: 'null',
|
problem: 'null',
|
||||||
dataDir: 'null',
|
dataDir: 'null',
|
||||||
lookupFile: 'null',
|
lookupFile: 'null',
|
||||||
|
|
@ -72,11 +73,11 @@
|
||||||
},
|
},
|
||||||
"search-index-server": {
|
"search-index-server": {
|
||||||
name: 'search-index-server',
|
name: 'search-index-server',
|
||||||
problem: $.components["t2t-code-search"].problem,
|
problem: $.components['t2t-code-search'].problem,
|
||||||
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
dataDir: $.components['t2t-code-search'].workingDir + '/data',
|
||||||
lookupFile: $.components['t2t-code-search'].workingDir + '/code_search_index.csv',
|
lookupFile: $.components['t2t-code-search'].workingDir + '/code_search_index.csv',
|
||||||
indexFile: $.components['t2t-code-search'].workingDir + '/code_search_index.nmslib',
|
indexFile: $.components['t2t-code-search'].workingDir + '/code_search_index.nmslib',
|
||||||
servingUrl: 'http://t2t-code-search.kubeflow:8000/v1/models/t2t-code-search:predict',
|
servingUrl: 'http://t2t-code-search.kubeflow:9001/v1/models/t2t-code-search:predict',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,230 @@
|
||||||
|
// @apiVersion 0.1
|
||||||
|
// @name io.ksonnet.pkg.tf-serving-request-log
|
||||||
|
// @description tf-serving with request logging
|
||||||
|
// @shortDescription tf-serving with request logging
|
||||||
|
// @param name string Name to give to each of the components
|
||||||
|
// @param gcpProject string The gcp project for Bigquery dataset
|
||||||
|
// @param dataset string The Bigquery dataset
|
||||||
|
// @param table string The Bigquery table
|
||||||
|
// @optionalParam modelBasePath string gs://kubeflow-examples-data/mnist The model path
|
||||||
|
// @optionalParam modelName string mnist The model name
|
||||||
|
|
||||||
|
local k = import "k.libsonnet";
|
||||||
|
|
||||||
|
local namespace = "kubeflow";
|
||||||
|
local appName = import "param://name";
|
||||||
|
local image = "gcr.io/kubeflow-images-public/tf-model-server-cpu:v20180327-995786ec";
|
||||||
|
local httpProxyImage = "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180723";
|
||||||
|
local loggingImage = "gcr.io/kubeflow-images-public/tf-model-server-request-logger:v20180723";
|
||||||
|
|
||||||
|
local gcpSecretName = "user-gcp-sa";
|
||||||
|
|
||||||
|
local service = {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "Service",
|
||||||
|
metadata: {
|
||||||
|
labels: {
|
||||||
|
app: appName,
|
||||||
|
},
|
||||||
|
name: appName,
|
||||||
|
namespace: namespace,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
ports: [
|
||||||
|
{
|
||||||
|
name: "grpc-tf-serving",
|
||||||
|
port: 9000,
|
||||||
|
targetPort: 9000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "http-tf-serving-proxy",
|
||||||
|
port: 8000,
|
||||||
|
targetPort: 8000,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
selector: {
|
||||||
|
app: appName,
|
||||||
|
},
|
||||||
|
type: "ClusterIP",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
local configMap = {
|
||||||
|
apiVersion: "v1",
|
||||||
|
kind: "ConfigMap",
|
||||||
|
metadata: {
|
||||||
|
name: appName + "fluentd-config",
|
||||||
|
namespace: namespace,
|
||||||
|
},
|
||||||
|
data: {
|
||||||
|
"fluent.conf": std.format(|||
|
||||||
|
<source>
|
||||||
|
@type tail
|
||||||
|
path /tmp/logs/request.log
|
||||||
|
pos_file /tmp/logs/request.log.pos
|
||||||
|
<parse>
|
||||||
|
@type json
|
||||||
|
</parse>
|
||||||
|
tag dummy
|
||||||
|
</source>
|
||||||
|
<match dummy>
|
||||||
|
@type bigquery_insert
|
||||||
|
auth_method application_default
|
||||||
|
project %s
|
||||||
|
dataset %s
|
||||||
|
table %s
|
||||||
|
fetch_schema true
|
||||||
|
</match>
|
||||||
|
|||, [params.gcpProject, params.dataset, params.table]),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
local deployment = {
|
||||||
|
apiVersion: "extensions/v1beta1",
|
||||||
|
kind: "Deployment",
|
||||||
|
metadata: {
|
||||||
|
labels: {
|
||||||
|
app: appName,
|
||||||
|
},
|
||||||
|
name: appName,
|
||||||
|
namespace: namespace,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
template: {
|
||||||
|
metadata: {
|
||||||
|
labels: {
|
||||||
|
app: appName,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
containers: [
|
||||||
|
// ModelServer
|
||||||
|
{
|
||||||
|
args: [
|
||||||
|
"/usr/bin/tensorflow_model_server",
|
||||||
|
"--port=9000",
|
||||||
|
"--model_name=" + params.modelName,
|
||||||
|
"--model_base_path=" + params.modelBasePath,
|
||||||
|
],
|
||||||
|
image: image,
|
||||||
|
imagePullPolicy: "IfNotPresent",
|
||||||
|
name: "model-server",
|
||||||
|
ports: [
|
||||||
|
{
|
||||||
|
containerPort: 9000,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
resources: {
|
||||||
|
limits: {
|
||||||
|
cpu: "4",
|
||||||
|
memory: "4Gi",
|
||||||
|
},
|
||||||
|
requests: {
|
||||||
|
cpu: "1",
|
||||||
|
memory: "1Gi",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// Http proxy
|
||||||
|
{
|
||||||
|
name: "http-proxy",
|
||||||
|
image: httpProxyImage,
|
||||||
|
imagePullPolicy: "Always",
|
||||||
|
command: [
|
||||||
|
"python",
|
||||||
|
"/usr/src/app/server.py",
|
||||||
|
"--port=8000",
|
||||||
|
"--rpc_port=9000",
|
||||||
|
"--rpc_timeout=10.0",
|
||||||
|
"--log_request=true",
|
||||||
|
],
|
||||||
|
env: [],
|
||||||
|
ports: [
|
||||||
|
{
|
||||||
|
containerPort: 8000,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
resources: {
|
||||||
|
requests: {
|
||||||
|
memory: "1Gi",
|
||||||
|
cpu: "1",
|
||||||
|
},
|
||||||
|
limits: {
|
||||||
|
memory: "4Gi",
|
||||||
|
cpu: "4",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
securityContext: {
|
||||||
|
runAsUser: 1000,
|
||||||
|
fsGroup: 1000,
|
||||||
|
},
|
||||||
|
volumeMounts: [
|
||||||
|
{
|
||||||
|
name: "request-logs",
|
||||||
|
mountPath: "/tmp/logs",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
// TODO(lunkai): use admission controller to inject.
|
||||||
|
// Logging container.
|
||||||
|
{
|
||||||
|
name: "logging",
|
||||||
|
image: loggingImage,
|
||||||
|
imagePullPolicy: "Always",
|
||||||
|
env: [
|
||||||
|
{ name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" },
|
||||||
|
],
|
||||||
|
resources: {
|
||||||
|
requests: {
|
||||||
|
memory: "250Mi",
|
||||||
|
cpu: "0.25",
|
||||||
|
},
|
||||||
|
limits: {
|
||||||
|
memory: "500Mi",
|
||||||
|
cpu: "0.5",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
volumeMounts: [
|
||||||
|
{
|
||||||
|
name: "request-logs",
|
||||||
|
mountPath: "/tmp/logs",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gcp-credentials",
|
||||||
|
mountPath: "/secret/gcp-credentials",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "fluentd-config-volume",
|
||||||
|
mountPath: "/fluentd/etc/custom",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
volumes: [
|
||||||
|
{
|
||||||
|
name: "gcp-credentials",
|
||||||
|
secret: {
|
||||||
|
secretName: gcpSecretName,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "request-logs",
|
||||||
|
emptyDir: {},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
configMap: {
|
||||||
|
name: "fluentd-config",
|
||||||
|
},
|
||||||
|
name: "fluentd-config-volume",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
k.core.v1.list.new([
|
||||||
|
service,
|
||||||
|
deployment,
|
||||||
|
configMap,
|
||||||
|
])
|
||||||
|
|
@ -18,12 +18,7 @@
|
||||||
deployIstio: false,
|
deployIstio: false,
|
||||||
|
|
||||||
deployHttpProxy: false,
|
deployHttpProxy: false,
|
||||||
defaultHttpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2",
|
httpProxyImage: "gcr.io/kubeflow-images-public/tf-model-server-http-proxy:v20180606-9dfda4f2",
|
||||||
httpProxyImage: "",
|
|
||||||
httpProxyImageToUse: if $.params.httpProxyImage == "" then
|
|
||||||
$.params.defaultHttpProxyImage
|
|
||||||
else
|
|
||||||
$.params.httpProxyImage,
|
|
||||||
|
|
||||||
serviceType: "ClusterIP",
|
serviceType: "ClusterIP",
|
||||||
|
|
||||||
|
|
@ -57,10 +52,10 @@
|
||||||
// Name of the k8s secrets containing S3 credentials
|
// Name of the k8s secrets containing S3 credentials
|
||||||
s3SecretName: "",
|
s3SecretName: "",
|
||||||
// Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
|
// Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
|
||||||
s3SecretAccesskeyidKeyName: "",
|
s3SecretAccesskeyidKeyName: "AWS_ACCESS_KEY_ID",
|
||||||
|
|
||||||
// Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
|
// Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
|
||||||
s3SecretSecretaccesskeyKeyName: "",
|
s3SecretSecretaccesskeyKeyName: "AWS_SECRET_ACCESS_KEY",
|
||||||
|
|
||||||
// S3 region
|
// S3 region
|
||||||
s3AwsRegion: "us-west-1",
|
s3AwsRegion: "us-west-1",
|
||||||
|
|
@ -122,7 +117,7 @@
|
||||||
args: [
|
args: [
|
||||||
"/usr/bin/tensorflow_model_server",
|
"/usr/bin/tensorflow_model_server",
|
||||||
"--port=9000",
|
"--port=9000",
|
||||||
"--rest_api_port=8000",
|
"--rest_api_port=9001",
|
||||||
"--model_name=" + $.params.modelName,
|
"--model_name=" + $.params.modelName,
|
||||||
"--model_base_path=" + $.params.modelPath,
|
"--model_base_path=" + $.params.modelPath,
|
||||||
],
|
],
|
||||||
|
|
@ -130,6 +125,9 @@
|
||||||
{
|
{
|
||||||
containerPort: 9000,
|
containerPort: 9000,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
containerPort: 9001,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
// TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
|
// TODO(jlewi): We should add readiness and liveness probes. I think the blocker is that
|
||||||
// model-server doesn't have something we can use out of the box.
|
// model-server doesn't have something we can use out of the box.
|
||||||
|
|
@ -176,7 +174,7 @@
|
||||||
|
|
||||||
httpProxyContainer:: {
|
httpProxyContainer:: {
|
||||||
name: $.params.name + "-http-proxy",
|
name: $.params.name + "-http-proxy",
|
||||||
image: $.params.httpProxyImageToUse,
|
image: $.params.httpProxyImage,
|
||||||
imagePullPolicy: "IfNotPresent",
|
imagePullPolicy: "IfNotPresent",
|
||||||
command: [
|
command: [
|
||||||
"python",
|
"python",
|
||||||
|
|
@ -193,12 +191,12 @@
|
||||||
],
|
],
|
||||||
resources: {
|
resources: {
|
||||||
requests: {
|
requests: {
|
||||||
memory: "1Gi",
|
memory: "500Mi",
|
||||||
cpu: "1",
|
cpu: "0.5",
|
||||||
},
|
},
|
||||||
limits: {
|
limits: {
|
||||||
memory: "4Gi",
|
memory: "1Gi",
|
||||||
cpu: "4",
|
cpu: "1",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
securityContext: {
|
securityContext: {
|
||||||
|
|
@ -274,6 +272,11 @@
|
||||||
port: 9000,
|
port: 9000,
|
||||||
targetPort: 9000,
|
targetPort: 9000,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "rest-tf-serving",
|
||||||
|
port: 9001,
|
||||||
|
targetPort: 9001,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "http-tf-serving-proxy",
|
name: "http-tf-serving-proxy",
|
||||||
port: 8000,
|
port: 8000,
|
||||||
|
|
@ -33,7 +33,7 @@ def create_search_index(argv=None):
|
||||||
with open(tmp_lookup_file, 'w') as lookup_file:
|
with open(tmp_lookup_file, 'w') as lookup_file:
|
||||||
lookup_writer = csv.writer(lookup_file)
|
lookup_writer = csv.writer(lookup_file)
|
||||||
|
|
||||||
for csv_file_path in tf.gfile.Glob('{}/*.csv'.format(args.data_dir)):
|
for csv_file_path in tf.gfile.Glob('{}/*index*.csv'.format(args.data_dir)):
|
||||||
tf.logging.debug('Reading {}'.format(csv_file_path))
|
tf.logging.debug('Reading {}'.format(csv_file_path))
|
||||||
|
|
||||||
with tf.gfile.Open(csv_file_path) as csv_file:
|
with tf.gfile.Open(csv_file_path) as csv_file:
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,9 @@ from six import StringIO
|
||||||
from tensor2tensor.data_generators import generator_utils
|
from tensor2tensor.data_generators import generator_utils
|
||||||
from tensor2tensor.data_generators import text_problems
|
from tensor2tensor.data_generators import text_problems
|
||||||
from tensor2tensor.utils import metrics
|
from tensor2tensor.utils import metrics
|
||||||
from tensor2tensor.utils import registry
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
@registry.register_problem
|
|
||||||
class GithubFunctionDocstring(text_problems.Text2TextProblem):
|
class GithubFunctionDocstring(text_problems.Text2TextProblem):
|
||||||
"""Function and Docstring similarity Problem.
|
"""Function and Docstring similarity Problem.
|
||||||
|
|
||||||
|
|
@ -67,7 +65,7 @@ class GithubFunctionDocstring(text_problems.Text2TextProblem):
|
||||||
@property
|
@property
|
||||||
def max_samples_for_vocab(self):
|
def max_samples_for_vocab(self):
|
||||||
# FIXME(sanyamkapoor): This exists to handle memory explosion.
|
# FIXME(sanyamkapoor): This exists to handle memory explosion.
|
||||||
return int(3.5e5)
|
return int(2e5)
|
||||||
|
|
||||||
def get_csv_files(self, _data_dir, tmp_dir, _dataset_split):
|
def get_csv_files(self, _data_dir, tmp_dir, _dataset_split):
|
||||||
return [
|
return [
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from tensor2tensor.utils import registry
|
||||||
from .function_docstring import GithubFunctionDocstring
|
from .function_docstring import GithubFunctionDocstring
|
||||||
|
|
||||||
|
|
||||||
@registry.register_problem
|
@registry.register_problem('cs_github_function_docstring')
|
||||||
class GithubFunctionDocstringExtended(GithubFunctionDocstring):
|
class GithubFunctionDocstringExtended(GithubFunctionDocstring):
|
||||||
"""Function Docstring problem with extended semantics.
|
"""Function Docstring problem with extended semantics.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from tensor2tensor.utils import t2t_model
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
@registry.register_model
|
@registry.register_model('cs_similarity_transformer')
|
||||||
class SimilarityTransformer(t2t_model.T2TModel):
|
class SimilarityTransformer(t2t_model.T2TModel):
|
||||||
"""Transformer Model for Similarity between two strings.
|
"""Transformer Model for Similarity between two strings.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,5 +7,5 @@ numpy~=1.14.0
|
||||||
oauth2client~=4.1.0
|
oauth2client~=4.1.0
|
||||||
requests~=2.18.0
|
requests~=2.18.0
|
||||||
spacy~=2.0.0
|
spacy~=2.0.0
|
||||||
tensor2tensor~=1.6.0
|
tensor2tensor~=1.7.0
|
||||||
tensorflow~=1.8.0
|
tensorflow~=1.8.0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue