mirror of https://github.com/kubeflow/examples.git
add pipeline step to push to git (#387)
* add push to git * small fixes * work around .after() * format
This commit is contained in:
parent
494fc05f16
commit
e8cf9c58ce
|
@ -16,12 +16,18 @@ ksEnvName="pipeline"
|
|||
component="search-index-creator"
|
||||
|
||||
usage() {
|
||||
echo "Usage: launch_search_index_creator_job.sh --workingDir=<working dir> --workflowId=<workflow id invoking the container>
|
||||
--dataDir=<data dir> --timeout=<timeout> --namespace=<kubernetes namespace> --cluster=<cluster to deploy job to> "
|
||||
echo "Usage: launch_search_index_creator_job.sh
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--indexFile=<index file>
|
||||
--lookupFile=<lookup file>
|
||||
--dataDir=<data dir>
|
||||
--timeout=<timeout>
|
||||
--namespace=<kubernetes namespace>
|
||||
--cluster=<cluster to deploy job to>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
names=(workingDir workflowId dataDir namespace cluster)
|
||||
names=(workflowId indexFile lookupFile dataDir namespace cluster)
|
||||
|
||||
source "${DIR}/parse_arguments.sh"
|
||||
source "${DIR}/initialize_kubectl.sh"
|
||||
|
@ -29,8 +35,8 @@ source "${DIR}/initialize_kubectl.sh"
|
|||
# Apply parameters
|
||||
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
|
||||
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
||||
ks param set ${component} lookupFile ${workingDir}/code-embeddings-index/embedding-to-info.csv --env ${ksEnvName}
|
||||
ks param set ${component} indexFile ${workingDir}/code-embeddings-index/embeddings.index --env ${ksEnvName}
|
||||
ks param set ${component} lookupFile ${lookupFile} --env ${ksEnvName}
|
||||
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
|
||||
|
||||
ks show ${ksEnvName} -c "${component}"
|
||||
ks apply ${ksEnvName} -c "${component}"
|
||||
|
|
|
@ -20,9 +20,16 @@ numWorkers=5
|
|||
workerMachineType=n1-highcpu-32
|
||||
|
||||
usage() {
|
||||
echo "Usage: submit_code_embeddings_job.sh --workflowId=<workflow id invoking the container> --modelDir=<directory contains the model>
|
||||
--dataDir=<data dir> --numWorkers=<num of workers> --project=<project> --targetDataset=<target BQ dataset>
|
||||
--workerMachineType=<worker machine type> --workingDir=<working dir> --cluster=<cluster to deploy job to>"
|
||||
echo "Usage: submit_code_embeddings_job.sh
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--modelDir=<directory contains the model>
|
||||
--dataDir=<data dir>
|
||||
--numWorkers=<num of workers>
|
||||
--project=<project>
|
||||
--targetDataset=<target BQ dataset>
|
||||
--workerMachineType=<worker machine type>
|
||||
--workingDir=<working dir>
|
||||
--cluster=<cluster to deploy job to>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
|
|
|
@ -10,13 +10,20 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
|
|||
branch=master
|
||||
|
||||
usage() {
|
||||
echo "Usage: update_index.sh --branch=<base branch> --appDir=<ksonnet app dir>
|
||||
--gitRepo=<github repo with Argo CD hooked up> --env=<ksonnet environment> --indexFile=<index file>
|
||||
--lookupFile=<lookup file> --workflowId=<workflow id invoking the container>"
|
||||
echo "Usage: update_index.sh
|
||||
--baseGitRepo=<base git repo name>
|
||||
--baseBranch=<base branch>
|
||||
--appDir=<ksonnet app dir>
|
||||
--forkGitRepo=<github repo with Argo CD hooked up>
|
||||
--env=<ksonnet environment>
|
||||
--indexFile=<index file>
|
||||
--lookupFile=<lookup file>
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--botEmail=<email account of the bot that send the PR>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
names=(appDir gitRepo env lookupFile indexFile workflowId)
|
||||
names=(baseGitRepo baseBranch appDir forkGitRepo env indexFile lookupFile workflowId botEmail)
|
||||
|
||||
source "${DIR}/parse_arguments.sh"
|
||||
|
||||
|
@ -29,9 +36,13 @@ if [ -z ${dryrun} ]; then
|
|||
fi
|
||||
|
||||
|
||||
git config --global user.email pipeline@localhost
|
||||
git clone -b ${branch} https://${GITHUB_TOKEN}@github.com/${gitRepo}.git repo && cd repo/${appDir}
|
||||
git config --global user.email ${botEmail}
|
||||
git clone https://${GITHUB_TOKEN}@github.com/${forkGitRepo}.git repo && cd repo/${appDir}
|
||||
git config credential.helper store
|
||||
git remote add upstream https://github.com/${baseGitRepo}.git
|
||||
git fetch upstream
|
||||
git merge upstream/${baseBranch} master
|
||||
|
||||
git checkout -b ${workflowId}
|
||||
ks param set --env=${env} search-index-server indexFile ${indexFile}
|
||||
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
|
||||
|
@ -51,7 +62,7 @@ EOF
|
|||
# Create a pull request
|
||||
if (! ${dryrun}); then
|
||||
git push origin ${workflowId}
|
||||
hub pull-request --base=${gitRepo}:${branch} -F ${FILE}
|
||||
hub pull-request --base=${baseGitRepo}:${baseBranch} -F ${FILE}
|
||||
else
|
||||
echo "dry run; not committing to git."
|
||||
fi
|
||||
|
|
|
@ -64,7 +64,7 @@ def dataflow_function_embedding_op(
|
|||
num_workers: int, working_dir: str, step_name='dataflow_function_embedding'):
|
||||
return default_gcp_op(
|
||||
name=step_name,
|
||||
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||
command=['/usr/local/src/submit_code_embeddings_job.sh'],
|
||||
arguments=[
|
||||
"--workflowId=%s" % workflow_id,
|
||||
|
@ -81,14 +81,16 @@ def dataflow_function_embedding_op(
|
|||
|
||||
|
||||
def search_index_creator_op(
|
||||
working_dir: str, data_dir: str, workflow_id: str, cluster_name: str, namespace: str):
|
||||
index_file: str, lookup_file: str, data_dir: str,
|
||||
workflow_id: str, cluster_name: str, namespace: str):
|
||||
return dsl.ContainerOp(
|
||||
# use component name as step name
|
||||
name='search_index_creator',
|
||||
image='gcr.io/kubeflow-examples/code-search-ks:v20181127-08f8c05-dirty-19ca4c',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||
command=['/usr/local/src/launch_search_index_creator_job.sh'],
|
||||
arguments=[
|
||||
'--workingDir=%s' % working_dir,
|
||||
'--indexFile=%s' % index_file,
|
||||
'--lookupFile=%s' % lookup_file,
|
||||
'--dataDir=%s' % data_dir,
|
||||
'--workflowId=%s' % workflow_id,
|
||||
'--cluster=%s' % cluster_name,
|
||||
|
@ -97,29 +99,85 @@ def search_index_creator_op(
|
|||
)
|
||||
|
||||
|
||||
def update_index_op(
|
||||
base_git_repo: str, base_branch: str, app_dir: str, fork_git_repo: str,
|
||||
index_file: str, lookup_file: str, workflow_id: str, bot_email: str):
|
||||
return (
|
||||
dsl.ContainerOp(
|
||||
name='update_index',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181130-b807843',
|
||||
command=['/usr/local/src/update_index.sh'],
|
||||
arguments=[
|
||||
'--baseGitRepo=%s' % base_git_repo,
|
||||
'--baseBranch=%s' % base_branch,
|
||||
'--appDir=%s' % app_dir,
|
||||
'--forkGitRepo=%s' % fork_git_repo,
|
||||
'--env=%s' % 'pipeline',
|
||||
'--indexFile=%s' % index_file,
|
||||
'--lookupFile=%s' % lookup_file,
|
||||
'--workflowId=%s' % workflow_id,
|
||||
'--botEmail=%s' % bot_email,
|
||||
],
|
||||
)
|
||||
.add_volume(
|
||||
k8s_client.V1Volume(
|
||||
name='github-access-token',
|
||||
secret=k8s_client.V1SecretVolumeSource(
|
||||
secret_name='github-access-token'
|
||||
)
|
||||
)
|
||||
)
|
||||
.add_env_variable(
|
||||
k8s_client.V1EnvVar(
|
||||
name='GITHUB_TOKEN',
|
||||
value_from=k8s_client.V1EnvVarSource(
|
||||
secret_key_ref=k8s_client.V1SecretKeySelector(
|
||||
name='github-access-token',
|
||||
key='token',
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# The pipeline definition
|
||||
@dsl.pipeline(
|
||||
name='function_embedding',
|
||||
description='Example function embedding pipeline'
|
||||
)
|
||||
def function_embedding_update(
|
||||
project,
|
||||
working_dir,
|
||||
saved_model_dir,
|
||||
cluster_name,
|
||||
namespace,
|
||||
target_dataset=dsl.PipelineParam(name='target-dataset', value='code_search'),
|
||||
worker_machine_type=dsl.PipelineParam(name='worker-machine-type', value='n1-highcpu-32'),
|
||||
num_workers=dsl.PipelineParam(name='num-workers', value=5)):
|
||||
project='code-search-demo',
|
||||
cluster_name='cs-demo-1103',
|
||||
namespace='kubeflow',
|
||||
working_dir='gs://code-search-demo/pipeline',
|
||||
data_dir='gs://code-search-demo/20181104/data',
|
||||
saved_model_dir='gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/',
|
||||
target_dataset='code_search',
|
||||
worker_machine_type='n1-highcpu-32',
|
||||
function_embedding_num_workers=5,
|
||||
base_git_repo='kubeflow/examples',
|
||||
base_branch='master',
|
||||
app_dir='code_search/ks-web-app',
|
||||
fork_git_repo='IronPan/examples',
|
||||
bot_email='kf.sample.bot@gmail.com'):
|
||||
workflow_name = '{{workflow.name}}'
|
||||
working_dir = '%s/%s' % (working_dir, workflow_name)
|
||||
data_dir = '%s/data' % working_dir
|
||||
lookup_file = '%s/code-embeddings-index/embedding-to-info.csv' % working_dir
|
||||
index_file = '%s/code-embeddings-index/embeddings.index'% working_dir
|
||||
|
||||
function_embedding = dataflow_function_embedding_op(
|
||||
project, cluster_name, target_dataset, data_dir,
|
||||
saved_model_dir,
|
||||
workflow_name, worker_machine_type, num_workers, working_dir)
|
||||
search_index_creator_op(
|
||||
working_dir, data_dir, workflow_name, cluster_name, namespace).after(function_embedding)
|
||||
saved_model_dir, workflow_name, worker_machine_type,
|
||||
function_embedding_num_workers, working_dir)
|
||||
|
||||
search_index_creator = search_index_creator_op(
|
||||
index_file, lookup_file, data_dir, workflow_name, cluster_name, namespace)
|
||||
search_index_creator.after(function_embedding)
|
||||
update_index_op(
|
||||
base_git_repo, base_branch, app_dir, fork_git_repo,
|
||||
index_file, lookup_file, workflow_name, bot_email)\
|
||||
.after(search_index_creator)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue