mirror of https://github.com/kubeflow/examples.git
GIS KFP example: kfp-related syntax changes; use gcs client libs instead of gsutil (#749)
* some mods to accommodate (perhaps temporary) changes in how the kfp sdk works * Use gcs client libs rather than gsutil for a gcs copy; required due to changes in node service account permissions. * more mods to address kfp syntax changes
This commit is contained in:
parent
b218d2b23c
commit
fd7a98fed1
|
@ -39,6 +39,16 @@ def copy_local_directory_to_gcs(project, local_path, bucket_name, gcs_path):
|
|||
blob = bucket.blob(remote_path)
|
||||
blob.upload_from_filename(local_file)
|
||||
|
||||
def download_blob(bucket_name, source_blob_name, destination_file_name):
|
||||
"""Downloads a blob from the bucket."""
|
||||
storage_client = storage.Client()
|
||||
bucket = storage_client.bucket(bucket_name)
|
||||
blob = bucket.blob(source_blob_name)
|
||||
blob.download_to_filename(destination_file_name)
|
||||
print("Blob {} downloaded to {}.".format(
|
||||
source_blob_name, destination_file_name)
|
||||
)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='ML Trainer')
|
||||
parser.add_argument(
|
||||
|
@ -57,13 +67,8 @@ def main():
|
|||
local_data_dir = '/ml/t2t_gh_data'
|
||||
local_source_data_file = '/ml/gh_data/github_issues.csv'
|
||||
|
||||
data_copy_command1 = ['gsutil', 'cp',
|
||||
'gs://aju-dev-demos-codelabs/kubecon/gh_data/github_issues.csv',
|
||||
local_source_data_file
|
||||
]
|
||||
print(data_copy_command1)
|
||||
result = subprocess.call(data_copy_command1)
|
||||
print(result)
|
||||
download_blob('aju-dev-demos-codelabs', 'kubecon/gh_data/github_issues.csv',
|
||||
local_source_data_file)
|
||||
|
||||
datagen_command = ['t2t-datagen', '--data_dir', local_data_dir, '--t2t_usr_dir',
|
||||
'/ml/ghsumm/trainer',
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import kfp.dsl as dsl
|
||||
import kfp.gcp as gcp
|
||||
import kfp.components as comp
|
||||
from kfp.dsl.types import GCSPath, String
|
||||
# from kfp.dsl.types import GCSPath, String
|
||||
|
||||
|
||||
COPY_ACTION = 'copy_data'
|
||||
|
@ -43,12 +43,12 @@ metadata_log_op = comp.load_component_from_url(
|
|||
)
|
||||
def gh_summ( #pylint: disable=unused-argument
|
||||
train_steps: 'Integer' = 2019300,
|
||||
project: String = 'YOUR_PROJECT_HERE',
|
||||
github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
|
||||
working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
|
||||
checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
|
||||
deploy_webapp: String = 'true',
|
||||
data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
|
||||
project: str = 'YOUR_PROJECT_HERE',
|
||||
github_token: str = 'YOUR_GITHUB_TOKEN_HERE',
|
||||
working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE',
|
||||
checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
|
||||
deploy_webapp: str = 'true',
|
||||
data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
|
||||
):
|
||||
|
||||
|
||||
|
|
Binary file not shown.
|
@ -15,7 +15,7 @@
|
|||
|
||||
import kfp.dsl as dsl
|
||||
import kfp.gcp as gcp
|
||||
from kfp.dsl.types import String
|
||||
# from kfp.dsl.types import String
|
||||
|
||||
|
||||
@dsl.pipeline(
|
||||
|
@ -23,7 +23,7 @@ from kfp.dsl.types import String
|
|||
description='Demonstrate Tensor2Tensor-based training and TF-Serving'
|
||||
)
|
||||
def gh_summ_serveonly(
|
||||
github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
|
||||
github_token: str = 'YOUR_GITHUB_TOKEN_HERE',
|
||||
):
|
||||
|
||||
|
||||
|
|
Binary file not shown.
|
@ -173,12 +173,12 @@
|
|||
")\n",
|
||||
"def gh_summ( #pylint: disable=unused-argument\n",
|
||||
" train_steps: 'Integer' = 2019300,\n",
|
||||
" project: String = 'YOUR_PROJECT_HERE',\n",
|
||||
" github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
|
||||
" working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
|
||||
" checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
|
||||
" deploy_webapp: String = 'true',\n",
|
||||
" data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
|
||||
" project: str = 'YOUR_PROJECT_HERE',\n",
|
||||
" github_token: str = 'YOUR_GITHUB_TOKEN_HERE',\n",
|
||||
" working_dir: 'GCSPath' = 'YOUR_GCS_DIR_HERE',\n",
|
||||
" checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
|
||||
" deploy_webapp: str = 'true',\n",
|
||||
" data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
|
||||
" ):\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
@ -266,8 +266,8 @@
|
|||
"source": [
|
||||
"# You'd uncomment this call to actually run the pipeline. \n",
|
||||
"# run = client.run_pipeline(exp.id, 'ghsumm', 'ghsumm.tar.gz',\n",
|
||||
"# params={'working-dir': WORKING_DIR,\n",
|
||||
"# 'github-token': GITHUB_TOKEN,\n",
|
||||
"# params={'working_dir': WORKING_DIR,\n",
|
||||
"# 'github_token': GITHUB_TOKEN,\n",
|
||||
"# 'project': PROJECT_NAME})"
|
||||
]
|
||||
},
|
||||
|
@ -329,12 +329,12 @@
|
|||
")\n",
|
||||
"def gh_summ2(\n",
|
||||
" train_steps: 'Integer' = 2019300,\n",
|
||||
" project: String = 'YOUR_PROJECT_HERE',\n",
|
||||
" github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
|
||||
" working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
|
||||
" checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
|
||||
" deploy_webapp: String = 'true',\n",
|
||||
" data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
|
||||
" project: str = 'YOUR_PROJECT_HERE',\n",
|
||||
" github_token: str = 'YOUR_GITHUB_TOKEN_HERE',\n",
|
||||
" working_dir: 'GCSPath' = 'YOUR_GCS_DIR_HERE',\n",
|
||||
" checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
|
||||
" deploy_webapp: str = 'true',\n",
|
||||
" data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
|
||||
" ):\n",
|
||||
"\n",
|
||||
" # The new pre-processing op.\n",
|
||||
|
@ -418,9 +418,9 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"run = client.run_pipeline(exp.id, 'ghsumm2', 'ghsumm2.tar.gz',\n",
|
||||
" params={'working-dir': WORKING_DIR,\n",
|
||||
" 'github-token': GITHUB_TOKEN,\n",
|
||||
" 'deploy-webapp': DEPLOY_WEBAPP,\n",
|
||||
" params={'working_dir': WORKING_DIR,\n",
|
||||
" 'github_token': GITHUB_TOKEN,\n",
|
||||
" 'deploy_webapp': DEPLOY_WEBAPP,\n",
|
||||
" 'project': PROJECT_NAME})"
|
||||
]
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue