add pipeline step to push to git (#387)

* add push to git

* small fixes

* work around .after()

* format
This commit is contained in:
IronPan 2018-12-02 09:37:21 -08:00 committed by Kubernetes Prow Robot
parent 494fc05f16
commit e8cf9c58ce
4 changed files with 114 additions and 32 deletions

View File

@ -16,12 +16,18 @@ ksEnvName="pipeline"
component="search-index-creator"
usage() {
echo "Usage: launch_search_index_creator_job.sh --workingDir=<working dir> --workflowId=<workflow id invoking the container>
--dataDir=<data dir> --timeout=<timeout> --namespace=<kubernetes namespace> --cluster=<cluster to deploy job to> "
echo "Usage: launch_search_index_creator_job.sh
--workflowId=<workflow id invoking the container>
--indexFile=<index file>
--lookupFile=<lookup file>
--dataDir=<data dir>
--timeout=<timeout>
--namespace=<kubernetes namespace>
--cluster=<cluster to deploy job to>"
}
# List of required parameters
names=(workingDir workflowId dataDir namespace cluster)
names=(workflowId indexFile lookupFile dataDir namespace cluster)
source "${DIR}/parse_arguments.sh"
source "${DIR}/initialize_kubectl.sh"
@ -29,8 +35,8 @@ source "${DIR}/initialize_kubectl.sh"
# Apply parameters
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
ks param set ${component} lookupFile ${workingDir}/code-embeddings-index/embedding-to-info.csv --env ${ksEnvName}
ks param set ${component} indexFile ${workingDir}/code-embeddings-index/embeddings.index --env ${ksEnvName}
ks param set ${component} lookupFile ${lookupFile} --env ${ksEnvName}
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
ks show ${ksEnvName} -c "${component}"
ks apply ${ksEnvName} -c "${component}"

View File

@ -20,9 +20,16 @@ numWorkers=5
workerMachineType=n1-highcpu-32
usage() {
echo "Usage: submit_code_embeddings_job.sh --workflowId=<workflow id invoking the container> --modelDir=<directory contains the model>
--dataDir=<data dir> --numWorkers=<num of workers> --project=<project> --targetDataset=<target BQ dataset>
--workerMachineType=<worker machine type> --workingDir=<working dir> --cluster=<cluster to deploy job to>"
echo "Usage: submit_code_embeddings_job.sh
--workflowId=<workflow id invoking the container>
--modelDir=<directory contains the model>
--dataDir=<data dir>
--numWorkers=<num of workers>
--project=<project>
--targetDataset=<target BQ dataset>
--workerMachineType=<worker machine type>
--workingDir=<working dir>
--cluster=<cluster to deploy job to>"
}
# List of required parameters

View File

@ -10,13 +10,20 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
branch=master
usage() {
echo "Usage: update_index.sh --branch=<base branch> --appDir=<ksonnet app dir>
--gitRepo=<github repo with Argo CD hooked up> --env=<ksonnet environment> --indexFile=<index file>
--lookupFile=<lookup file> --workflowId=<workflow id invoking the container>"
echo "Usage: update_index.sh
--baseGitRepo=<base git repo name>
--baseBranch=<base branch>
--appDir=<ksonnet app dir>
--forkGitRepo=<github repo with Argo CD hooked up>
--env=<ksonnet environment>
--indexFile=<index file>
--lookupFile=<lookup file>
--workflowId=<workflow id invoking the container>
--botEmail=<email account of the bot that send the PR>"
}
# List of required parameters
names=(appDir gitRepo env lookupFile indexFile workflowId)
names=(baseGitRepo baseBranch appDir forkGitRepo env indexFile lookupFile workflowId botEmail)
source "${DIR}/parse_arguments.sh"
@ -29,9 +36,13 @@ if [ -z ${dryrun} ]; then
fi
git config --global user.email pipeline@localhost
git clone -b ${branch} https://${GITHUB_TOKEN}@github.com/${gitRepo}.git repo && cd repo/${appDir}
git config --global user.email ${botEmail}
git clone https://${GITHUB_TOKEN}@github.com/${forkGitRepo}.git repo && cd repo/${appDir}
git config credential.helper store
git remote add upstream https://github.com/${baseGitRepo}.git
git fetch upstream
git merge upstream/${baseBranch} master
git checkout -b ${workflowId}
ks param set --env=${env} search-index-server indexFile ${indexFile}
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
@ -51,7 +62,7 @@ EOF
# Create a pull request
if (! ${dryrun}); then
git push origin ${workflowId}
hub pull-request --base=${gitRepo}:${branch} -F ${FILE}
hub pull-request --base=${baseGitRepo}:${baseBranch} -F ${FILE}
else
echo "dry run; not committing to git."
fi

View File

@ -64,7 +64,7 @@ def dataflow_function_embedding_op(
num_workers: int, working_dir: str, step_name='dataflow_function_embedding'):
return default_gcp_op(
name=step_name,
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
command=['/usr/local/src/submit_code_embeddings_job.sh'],
arguments=[
"--workflowId=%s" % workflow_id,
@ -81,14 +81,16 @@ def dataflow_function_embedding_op(
def search_index_creator_op(
working_dir: str, data_dir: str, workflow_id: str, cluster_name: str, namespace: str):
index_file: str, lookup_file: str, data_dir: str,
workflow_id: str, cluster_name: str, namespace: str):
return dsl.ContainerOp(
# use component name as step name
name='search_index_creator',
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
command=['/usr/local/src/launch_search_index_creator_job.sh'],
arguments=[
'--workingDir=%s' % working_dir,
'--indexFile=%s' % index_file,
'--lookupFile=%s' % lookup_file,
'--dataDir=%s' % data_dir,
'--workflowId=%s' % workflow_id,
'--cluster=%s' % cluster_name,
@ -97,29 +99,85 @@ def search_index_creator_op(
)
def update_index_op(
base_git_repo: str, base_branch: str, app_dir: str, fork_git_repo: str,
index_file: str, lookup_file: str, workflow_id: str, bot_email: str):
return (
dsl.ContainerOp(
name='update_index',
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
command=['/usr/local/src/update_index.sh'],
arguments=[
'--baseGitRepo=%s' % base_git_repo,
'--baseBranch=%s' % base_branch,
'--appDir=%s' % app_dir,
'--forkGitRepo=%s' % fork_git_repo,
'--env=%s' % 'pipeline',
'--indexFile=%s' % index_file,
'--lookupFile=%s' % lookup_file,
'--workflowId=%s' % workflow_id,
'--botEmail=%s' % bot_email,
],
)
.add_volume(
k8s_client.V1Volume(
name='github-access-token',
secret=k8s_client.V1SecretVolumeSource(
secret_name='github-access-token'
)
)
)
.add_env_variable(
k8s_client.V1EnvVar(
name='GITHUB_TOKEN',
value_from=k8s_client.V1EnvVarSource(
secret_key_ref=k8s_client.V1SecretKeySelector(
name='github-access-token',
key='token',
)
)
)
)
)
# The pipeline definition
@dsl.pipeline(
name='function_embedding',
description='Example function embedding pipeline'
)
def function_embedding_update(
project,
working_dir,
saved_model_dir,
cluster_name,
namespace,
target_dataset=dsl.PipelineParam(name='target-dataset', value='code_search'),
worker_machine_type=dsl.PipelineParam(name='worker-machine-type', value='n1-highcpu-32'),
num_workers=dsl.PipelineParam(name='num-workers', value=5)):
project='code-search-demo',
cluster_name='cs-demo-1103',
namespace='kubeflow',
working_dir='gs://code-search-demo/pipeline',
data_dir='gs://code-search-demo/20181104/data',
saved_model_dir='gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/',
target_dataset='code_search',
worker_machine_type='n1-highcpu-32',
function_embedding_num_workers=5,
base_git_repo='kubeflow/examples',
base_branch='master',
app_dir='code_search/ks-web-app',
fork_git_repo='IronPan/examples',
bot_email='kf.sample.bot@gmail.com'):
workflow_name = '{{workflow.name}}'
working_dir = '%s/%s' % (working_dir, workflow_name)
data_dir = '%s/data' % working_dir
lookup_file = '%s/code-embeddings-index/embedding-to-info.csv' % working_dir
index_file = '%s/code-embeddings-index/embeddings.index'% working_dir
function_embedding = dataflow_function_embedding_op(
project, cluster_name, target_dataset, data_dir,
saved_model_dir,
workflow_name, worker_machine_type, num_workers, working_dir)
search_index_creator_op(
working_dir, data_dir, workflow_name, cluster_name, namespace).after(function_embedding)
saved_model_dir, workflow_name, worker_machine_type,
function_embedding_num_workers, working_dir)
search_index_creator = search_index_creator_op(
index_file, lookup_file, data_dir, workflow_name, cluster_name, namespace)
search_index_creator.after(function_embedding)
update_index_op(
base_git_repo, base_branch, app_dir, fork_git_repo,
index_file, lookup_file, workflow_name, bot_email)\
.after(search_index_creator)
if __name__ == '__main__':