mirror of https://github.com/kubeflow/examples.git
Create a script to update the index and lookup file used to serve predictions. (#352)
* This script will be the last step in a pipeline to continuously update the index for serving. * The script updates the parameters of the search index server to point to the supplied index files. It then commits them and creates a PR to push those commits. * Restructure the parameters for the search index server so that we can use ks param set to override the indexFile and lookupFile. * We do this because we want to be able to push a new index by doing ks param set in a continuously running pipeline * Remove default parameters from search-index-server * Create a dockerfile suitable for running this script.
This commit is contained in:
parent
4f95e85e63
commit
5d6a4e9d71
|
|
@ -72,6 +72,17 @@ build-ui-gcb:
|
|||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.ui.json \
|
||||
--timeout=3600 ./build
|
||||
|
||||
build-index-updater-gcb:
|
||||
mkdir -p build
|
||||
jsonnet ./docker/index_updater/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
> ./build/build.index_updater.json
|
||||
cp -r ./docker ./build/
|
||||
cp -r ./src ./build/
|
||||
rm -rf ./build/src/code_search/dataflow/cli/test_data
|
||||
rm -rf ./build/src/code_search/t2t/test_data
|
||||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.index_updater.json \
|
||||
--timeout=3600 ./build
|
||||
|
||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||
# first.
|
||||
push-cpu: build-cpu
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
FROM ubuntu:xenial
|
||||
|
||||
RUN apt-get update && apt-get install -y wget &&\
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN wget -O /tmp/hub-linux-amd64-2.6.0.tgz https://github.com/github/hub/releases/download/v2.6.0/hub-linux-amd64-2.6.0.tgz && \
|
||||
cd /usr/local && \
|
||||
tar -xvf /tmp/hub-linux-amd64-2.6.0.tgz && \
|
||||
ln -sf /usr/local/hub-linux-amd64-2.6.0/bin/hub /usr/local/bin/hub
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Index Updater
|
||||
|
||||
A Docker image and script suitable for updating the index served.
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
// TODO(jlewi): We should tag the image latest and then
|
||||
// use latest as a cache so that rebuilds are fast
|
||||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
|
||||
{
|
||||
|
||||
"steps": [
|
||||
{
|
||||
"id": "build",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
"--file=docker/index_updater/Dockerfile",
|
||||
"."],
|
||||
},
|
||||
{
|
||||
"id": "tag",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["tag", "gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search/index_updater:latest",],
|
||||
"waitFor": ["build"],
|
||||
},
|
||||
],
|
||||
"images": ["gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search/index_updater:latest",
|
||||
],
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# This script creates a PR updating the nmslib index used by search-index-server.
|
||||
# It uses ks CLI to update the parameters.
|
||||
# After creating and pushing a commit it uses the hub github CLI to create a PR.
|
||||
#
|
||||
# The argument --base can be used to change the owner/org of the repo the PR is opened on.
|
||||
# To use the main kubeflow/examples repo use
|
||||
# --base=kubeflow:master
|
||||
#
|
||||
# To use user alex's fork use
|
||||
# --base=alex/master
|
||||
set -ex
|
||||
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
|
||||
|
||||
parseArgs() {
|
||||
# Parse all command line options
|
||||
while [[ $# -gt 0 ]]; do
|
||||
# Parameters should be of the form
|
||||
# --{name}=${value}
|
||||
echo parsing "$1"
|
||||
if [[ $1 =~ ^--(.*)=(.*)$ ]]; then
|
||||
name=${BASH_REMATCH[1]}
|
||||
value=${BASH_REMATCH[2]}
|
||||
|
||||
eval ${name}="${value}"
|
||||
elif [[ $1 =~ ^--(.*)$ ]]; then
|
||||
name=${BASH_REMATCH[1]}
|
||||
value=true
|
||||
eval ${name}="${value}"
|
||||
else
|
||||
echo "Argument $1 did not match the pattern --{name}={value} or --{name}"
|
||||
fi
|
||||
shift
|
||||
done
|
||||
}
|
||||
|
||||
usage() {
|
||||
echo "Usage: update_index.sh --base=OWNER:branch --appDir=<ksonnet app dir> --env=<ksonnet environment> --indexFile=<index file> --lookupFile=<lookup file>"
|
||||
}
|
||||
|
||||
parseArgs $*
|
||||
|
||||
if [ ! -z ${help} ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
if [ -z ${dryrun} ]; then
|
||||
dryrun=false
|
||||
fi
|
||||
|
||||
# List of required parameters
|
||||
names=(appDir env lookupFile indexFile base)
|
||||
|
||||
|
||||
missingParam=false
|
||||
for i in ${names[@]}; do
|
||||
if [ -z ${!i} ]; then
|
||||
echo "--${i} not set"
|
||||
missingParam=true
|
||||
fi
|
||||
done
|
||||
|
||||
if ${missingParam}; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
cd ${appDir}
|
||||
ks param set --env=${env} search-index-server indexFile ${indexFile}
|
||||
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
|
||||
git add .
|
||||
|
||||
if (! ${dryrun}); then
|
||||
git commit -m "Update the lookup and index file."
|
||||
git push
|
||||
else
|
||||
echo "dryrun; not committing to git."
|
||||
fi
|
||||
|
||||
FILE=$(mktemp tmp.create_pull_request.XXXX)
|
||||
|
||||
cat <<EOF >$FILE
|
||||
Update the lookup and index file.
|
||||
|
||||
This PR is automatically generated by update_index.sh.
|
||||
|
||||
This PR updates the index and lookup file used to serve
|
||||
predictions.
|
||||
EOF
|
||||
|
||||
# Create a pull request
|
||||
if (! ${dryrun}); then
|
||||
hub pull-request --base=${base} -F ${FILE}
|
||||
fi
|
||||
|
|
@ -97,11 +97,9 @@
|
|||
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
|
||||
},
|
||||
"search-index-server": {
|
||||
// Most defaults should be defined in experiments.libsonnet.
|
||||
// Parameters will be used to override those values.
|
||||
name: "search-index-server",
|
||||
problem: $.components["t2t-code-search"].problem,
|
||||
dataDir: $.components["t2t-code-search"].workingDir + "/data",
|
||||
lookupFile: $.components["t2t-code-search"].workingDir + "/code_search_index.csv",
|
||||
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
|
||||
servingUrl: "http://t2t-code-search.kubeflow:8500/v1/models/t2t-code-search:predict",
|
||||
// 1 replica is convenient for debugging but we should bump after debugging.
|
||||
replicas: 1,
|
||||
|
|
|
|||
|
|
@ -7,9 +7,10 @@ local experiments = import "experiments.libsonnet";
|
|||
|
||||
local experimentName = baseParams.experiment;
|
||||
local experimentParams = experiments[experimentName];
|
||||
local params = baseParams + experimentParams + {
|
||||
name: "search-index-server",
|
||||
};
|
||||
|
||||
// baseParams override experiment parameters because we want to be able to set a new
|
||||
// index and csv file by doing ks param set.
|
||||
local params = experimentParams + baseParams;
|
||||
|
||||
local deploymentSpec = {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
local params = std.extVar("__ksonnet/params");
|
||||
local globals = import "globals.libsonnet";
|
||||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params {
|
||||
components+: {
|
||||
"t2t-code-search"+: {
|
||||
},
|
||||
"t2t-code-search"+: {},
|
||||
"t2t-code-search-datagen"+: {
|
||||
githubTable: "",
|
||||
githubTable: '',
|
||||
},
|
||||
"submit-preprocess-job"+: {
|
||||
githubTable: "",
|
||||
githubTable: '',
|
||||
},
|
||||
"search-index-server"+: {
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
@ -18,4 +19,4 @@ local envParams = params {
|
|||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue