mirror of https://github.com/kubeflow/examples.git
add pipeline step to push to git (#387)
* add push to git * small fixes * work around .after() * format
This commit is contained in:
parent
494fc05f16
commit
e8cf9c58ce
|
|
@ -16,12 +16,18 @@ ksEnvName="pipeline"
|
||||||
component="search-index-creator"
|
component="search-index-creator"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: launch_search_index_creator_job.sh --workingDir=<working dir> --workflowId=<workflow id invoking the container>
|
echo "Usage: launch_search_index_creator_job.sh
|
||||||
--dataDir=<data dir> --timeout=<timeout> --namespace=<kubernetes namespace> --cluster=<cluster to deploy job to> "
|
--workflowId=<workflow id invoking the container>
|
||||||
|
--indexFile=<index file>
|
||||||
|
--lookupFile=<lookup file>
|
||||||
|
--dataDir=<data dir>
|
||||||
|
--timeout=<timeout>
|
||||||
|
--namespace=<kubernetes namespace>
|
||||||
|
--cluster=<cluster to deploy job to>"
|
||||||
}
|
}
|
||||||
|
|
||||||
# List of required parameters
|
# List of required parameters
|
||||||
names=(workingDir workflowId dataDir namespace cluster)
|
names=(workflowId indexFile lookupFile dataDir namespace cluster)
|
||||||
|
|
||||||
source "${DIR}/parse_arguments.sh"
|
source "${DIR}/parse_arguments.sh"
|
||||||
source "${DIR}/initialize_kubectl.sh"
|
source "${DIR}/initialize_kubectl.sh"
|
||||||
|
|
@ -29,8 +35,8 @@ source "${DIR}/initialize_kubectl.sh"
|
||||||
# Apply parameters
|
# Apply parameters
|
||||||
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
|
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
|
||||||
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
||||||
ks param set ${component} lookupFile ${workingDir}/code-embeddings-index/embedding-to-info.csv --env ${ksEnvName}
|
ks param set ${component} lookupFile ${lookupFile} --env ${ksEnvName}
|
||||||
ks param set ${component} indexFile ${workingDir}/code-embeddings-index/embeddings.index --env ${ksEnvName}
|
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
|
||||||
|
|
||||||
ks show ${ksEnvName} -c "${component}"
|
ks show ${ksEnvName} -c "${component}"
|
||||||
ks apply ${ksEnvName} -c "${component}"
|
ks apply ${ksEnvName} -c "${component}"
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,16 @@ numWorkers=5
|
||||||
workerMachineType=n1-highcpu-32
|
workerMachineType=n1-highcpu-32
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: submit_code_embeddings_job.sh --workflowId=<workflow id invoking the container> --modelDir=<directory contains the model>
|
echo "Usage: submit_code_embeddings_job.sh
|
||||||
--dataDir=<data dir> --numWorkers=<num of workers> --project=<project> --targetDataset=<target BQ dataset>
|
--workflowId=<workflow id invoking the container>
|
||||||
--workerMachineType=<worker machine type> --workingDir=<working dir> --cluster=<cluster to deploy job to>"
|
--modelDir=<directory contains the model>
|
||||||
|
--dataDir=<data dir>
|
||||||
|
--numWorkers=<num of workers>
|
||||||
|
--project=<project>
|
||||||
|
--targetDataset=<target BQ dataset>
|
||||||
|
--workerMachineType=<worker machine type>
|
||||||
|
--workingDir=<working dir>
|
||||||
|
--cluster=<cluster to deploy job to>"
|
||||||
}
|
}
|
||||||
|
|
||||||
# List of required parameters
|
# List of required parameters
|
||||||
|
|
|
||||||
|
|
@ -10,13 +10,20 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
|
||||||
branch=master
|
branch=master
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: update_index.sh --branch=<base branch> --appDir=<ksonnet app dir>
|
echo "Usage: update_index.sh
|
||||||
--gitRepo=<github repo with Argo CD hooked up> --env=<ksonnet environment> --indexFile=<index file>
|
--baseGitRepo=<base git repo name>
|
||||||
--lookupFile=<lookup file> --workflowId=<workflow id invoking the container>"
|
--baseBranch=<base branch>
|
||||||
|
--appDir=<ksonnet app dir>
|
||||||
|
--forkGitRepo=<github repo with Argo CD hooked up>
|
||||||
|
--env=<ksonnet environment>
|
||||||
|
--indexFile=<index file>
|
||||||
|
--lookupFile=<lookup file>
|
||||||
|
--workflowId=<workflow id invoking the container>
|
||||||
|
--botEmail=<email account of the bot that send the PR>"
|
||||||
}
|
}
|
||||||
|
|
||||||
# List of required parameters
|
# List of required parameters
|
||||||
names=(appDir gitRepo env lookupFile indexFile workflowId)
|
names=(baseGitRepo baseBranch appDir forkGitRepo env indexFile lookupFile workflowId botEmail)
|
||||||
|
|
||||||
source "${DIR}/parse_arguments.sh"
|
source "${DIR}/parse_arguments.sh"
|
||||||
|
|
||||||
|
|
@ -29,9 +36,13 @@ if [ -z ${dryrun} ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
git config --global user.email pipeline@localhost
|
git config --global user.email ${botEmail}
|
||||||
git clone -b ${branch} https://${GITHUB_TOKEN}@github.com/${gitRepo}.git repo && cd repo/${appDir}
|
git clone https://${GITHUB_TOKEN}@github.com/${forkGitRepo}.git repo && cd repo/${appDir}
|
||||||
git config credential.helper store
|
git config credential.helper store
|
||||||
|
git remote add upstream https://github.com/${baseGitRepo}.git
|
||||||
|
git fetch upstream
|
||||||
|
git merge upstream/${baseBranch} master
|
||||||
|
|
||||||
git checkout -b ${workflowId}
|
git checkout -b ${workflowId}
|
||||||
ks param set --env=${env} search-index-server indexFile ${indexFile}
|
ks param set --env=${env} search-index-server indexFile ${indexFile}
|
||||||
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
|
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
|
||||||
|
|
@ -51,7 +62,7 @@ EOF
|
||||||
# Create a pull request
|
# Create a pull request
|
||||||
if (! ${dryrun}); then
|
if (! ${dryrun}); then
|
||||||
git push origin ${workflowId}
|
git push origin ${workflowId}
|
||||||
hub pull-request --base=${gitRepo}:${branch} -F ${FILE}
|
hub pull-request --base=${baseGitRepo}:${baseBranch} -F ${FILE}
|
||||||
else
|
else
|
||||||
echo "dry run; not committing to git."
|
echo "dry run; not committing to git."
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,7 @@ def dataflow_function_embedding_op(
|
||||||
num_workers: int, working_dir: str, step_name='dataflow_function_embedding'):
|
num_workers: int, working_dir: str, step_name='dataflow_function_embedding'):
|
||||||
return default_gcp_op(
|
return default_gcp_op(
|
||||||
name=step_name,
|
name=step_name,
|
||||||
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
|
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||||
command=['/usr/local/src/submit_code_embeddings_job.sh'],
|
command=['/usr/local/src/submit_code_embeddings_job.sh'],
|
||||||
arguments=[
|
arguments=[
|
||||||
"--workflowId=%s" % workflow_id,
|
"--workflowId=%s" % workflow_id,
|
||||||
|
|
@ -81,14 +81,16 @@ def dataflow_function_embedding_op(
|
||||||
|
|
||||||
|
|
||||||
def search_index_creator_op(
|
def search_index_creator_op(
|
||||||
working_dir: str, data_dir: str, workflow_id: str, cluster_name: str, namespace: str):
|
index_file: str, lookup_file: str, data_dir: str,
|
||||||
|
workflow_id: str, cluster_name: str, namespace: str):
|
||||||
return dsl.ContainerOp(
|
return dsl.ContainerOp(
|
||||||
# use component name as step name
|
# use component name as step name
|
||||||
name='search_index_creator',
|
name='search_index_creator',
|
||||||
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
|
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||||
command=['/usr/local/src/launch_search_index_creator_job.sh'],
|
command=['/usr/local/src/launch_search_index_creator_job.sh'],
|
||||||
arguments=[
|
arguments=[
|
||||||
'--workingDir=%s' % working_dir,
|
'--indexFile=%s' % index_file,
|
||||||
|
'--lookupFile=%s' % lookup_file,
|
||||||
'--dataDir=%s' % data_dir,
|
'--dataDir=%s' % data_dir,
|
||||||
'--workflowId=%s' % workflow_id,
|
'--workflowId=%s' % workflow_id,
|
||||||
'--cluster=%s' % cluster_name,
|
'--cluster=%s' % cluster_name,
|
||||||
|
|
@ -97,29 +99,85 @@ def search_index_creator_op(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def update_index_op(
|
||||||
|
base_git_repo: str, base_branch: str, app_dir: str, fork_git_repo: str,
|
||||||
|
index_file: str, lookup_file: str, workflow_id: str, bot_email: str):
|
||||||
|
return (
|
||||||
|
dsl.ContainerOp(
|
||||||
|
name='update_index',
|
||||||
|
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||||
|
command=['/usr/local/src/update_index.sh'],
|
||||||
|
arguments=[
|
||||||
|
'--baseGitRepo=%s' % base_git_repo,
|
||||||
|
'--baseBranch=%s' % base_branch,
|
||||||
|
'--appDir=%s' % app_dir,
|
||||||
|
'--forkGitRepo=%s' % fork_git_repo,
|
||||||
|
'--env=%s' % 'pipeline',
|
||||||
|
'--indexFile=%s' % index_file,
|
||||||
|
'--lookupFile=%s' % lookup_file,
|
||||||
|
'--workflowId=%s' % workflow_id,
|
||||||
|
'--botEmail=%s' % bot_email,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.add_volume(
|
||||||
|
k8s_client.V1Volume(
|
||||||
|
name='github-access-token',
|
||||||
|
secret=k8s_client.V1SecretVolumeSource(
|
||||||
|
secret_name='github-access-token'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.add_env_variable(
|
||||||
|
k8s_client.V1EnvVar(
|
||||||
|
name='GITHUB_TOKEN',
|
||||||
|
value_from=k8s_client.V1EnvVarSource(
|
||||||
|
secret_key_ref=k8s_client.V1SecretKeySelector(
|
||||||
|
name='github-access-token',
|
||||||
|
key='token',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# The pipeline definition
|
# The pipeline definition
|
||||||
@dsl.pipeline(
|
@dsl.pipeline(
|
||||||
name='function_embedding',
|
name='function_embedding',
|
||||||
description='Example function embedding pipeline'
|
description='Example function embedding pipeline'
|
||||||
)
|
)
|
||||||
def function_embedding_update(
|
def function_embedding_update(
|
||||||
project,
|
project='code-search-demo',
|
||||||
working_dir,
|
cluster_name='cs-demo-1103',
|
||||||
saved_model_dir,
|
namespace='kubeflow',
|
||||||
cluster_name,
|
working_dir='gs://code-search-demo/pipeline',
|
||||||
namespace,
|
data_dir='gs://code-search-demo/20181104/data',
|
||||||
target_dataset=dsl.PipelineParam(name='target-dataset', value='code_search'),
|
saved_model_dir='gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/',
|
||||||
worker_machine_type=dsl.PipelineParam(name='worker-machine-type', value='n1-highcpu-32'),
|
target_dataset='code_search',
|
||||||
num_workers=dsl.PipelineParam(name='num-workers', value=5)):
|
worker_machine_type='n1-highcpu-32',
|
||||||
|
function_embedding_num_workers=5,
|
||||||
|
base_git_repo='kubeflow/examples',
|
||||||
|
base_branch='master',
|
||||||
|
app_dir='code_search/ks-web-app',
|
||||||
|
fork_git_repo='IronPan/examples',
|
||||||
|
bot_email='kf.sample.bot@gmail.com'):
|
||||||
workflow_name = '{{workflow.name}}'
|
workflow_name = '{{workflow.name}}'
|
||||||
working_dir = '%s/%s' % (working_dir, workflow_name)
|
working_dir = '%s/%s' % (working_dir, workflow_name)
|
||||||
data_dir = '%s/data' % working_dir
|
lookup_file = '%s/code-embeddings-index/embedding-to-info.csv' % working_dir
|
||||||
|
index_file = '%s/code-embeddings-index/embeddings.index'% working_dir
|
||||||
|
|
||||||
function_embedding = dataflow_function_embedding_op(
|
function_embedding = dataflow_function_embedding_op(
|
||||||
project, cluster_name, target_dataset, data_dir,
|
project, cluster_name, target_dataset, data_dir,
|
||||||
saved_model_dir,
|
saved_model_dir, workflow_name, worker_machine_type,
|
||||||
workflow_name, worker_machine_type, num_workers, working_dir)
|
function_embedding_num_workers, working_dir)
|
||||||
search_index_creator_op(
|
|
||||||
working_dir, data_dir, workflow_name, cluster_name, namespace).after(function_embedding)
|
search_index_creator = search_index_creator_op(
|
||||||
|
index_file, lookup_file, data_dir, workflow_name, cluster_name, namespace)
|
||||||
|
search_index_creator.after(function_embedding)
|
||||||
|
update_index_op(
|
||||||
|
base_git_repo, base_branch, app_dir, fork_git_repo,
|
||||||
|
index_file, lookup_file, workflow_name, bot_email)\
|
||||||
|
.after(search_index_creator)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue