[Samples] Change the data url to point to ml-pipeline instead of ml-pipeline-playground (#3890)

* update location in tfx sample

* update xgboost

* update the rest

* update notebook sample
This commit is contained in:
Jiaxiao Zheng 2020-06-01 22:30:15 -07:00 committed by GitHub
parent a7be049b6d
commit 88ee54fa32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 25 additions and 22 deletions

View File

@ -147,9 +147,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
}

View File

@ -309,10 +309,10 @@
" description='Dataflow launch python pipeline'\n",
")\n",
"def pipeline(\n",
" python_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/wc.py',\n",
" python_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/wc.py',\n",
" project_id = project,\n",
" staging_dir = output,\n",
" requirements_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/requirements.txt',\n",
" requirements_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/requirements.txt',\n",
" args = json.dumps([\n",
" '--output', output_file\n",
" ]),\n",
@ -412,7 +412,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.4"
},
"pycharm": {
"stem_cell": {

View File

@ -43,7 +43,7 @@ def echo_op(text):
name='Exit Handler',
description='Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).'
)
def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'):
def download_and_print(url='gs://ml-pipeline/shakespeare/shakespeare1.txt'):
"""A sample pipeline showing exit handler."""
exit_task = echo_op('exit!')

View File

@ -53,17 +53,18 @@ _pipeline_name = 'iris_native_keras'
# utility function is in iris_utils.py. Feel free to customize as needed.
_data_root_param = data_types.RuntimeParameter(
name='data-root',
default='gs://ml-pipeline-playground/iris/data',
default='gs://ml-pipeline/sample-data/iris/data',
ptype=Text,
)
# Python module file to inject customized logic into the TFX components. The
# Transform and Trainer both require user-defined functions to run successfully.
# This file is fork from https://github.com/tensorflow/tfx/blob/master/tfx/examples/iris/iris_utils_native_keras.py
# and baked into the TFX image used in the pipeline.
_module_file_param = data_types.RuntimeParameter(
name='module-file',
default=
'gs://ml-pipeline-playground/iris/modules/iris_utils_native_keras.py',
'/tfx-src/tfx/examples/iris/iris_utils_native_keras.py',
ptype=Text,
)

View File

@ -43,8 +43,8 @@ def echo2_op(text1, text2):
description='Download two messages in parallel and prints the concatenated result.'
)
def download_and_join(
url1='gs://ml-pipeline-playground/shakespeare1.txt',
url2='gs://ml-pipeline-playground/shakespeare2.txt'
url1='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt',
url2='gs://ml-pipeline/sample-data/shakespeare/shakespeare2.txt'
):
"""A three-step pipeline with first two running in parallel."""

View File

@ -35,17 +35,18 @@ from tfx.proto import pusher_pb2
from tfx.proto import trainer_pb2
# Define pipeline params used for pipeline execution.
# Path to the module file, should be a GCS path.
# Path to the module file, should be a GCS path,
# or a module file baked in the docker image used by the pipeline.
_taxi_module_file_param = data_types.RuntimeParameter(
name='module-file',
default='gs://ml-pipeline-playground/tfx_taxi_simple/modules/taxi_utils.py',
default='/tfx-src/tfx/examples/chicago_taxi_pipeline/taxi_utils.py',
ptype=Text,
)
# Path to the CSV data file, under which their should be a data.csv file.
_data_root_param = data_types.RuntimeParameter(
name='data-root',
default='gs://ml-pipeline-playground/tfx_taxi_simple/data',
default='gs://ml-pipeline/sample-data/chicago-taxi/data',
ptype=Text,
)

View File

@ -53,7 +53,8 @@ for bucket in buckets:
name='Secret pipeline',
description='A pipeline to demonstrate mounting and use of secretes.'
)
def secret_op_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
def secret_op_pipeline(
url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
"""A pipeline that uses secret to access cloud hosted resouces."""
gcs_read_task = gcs_read_op(url)

View File

@ -42,7 +42,7 @@ def echo_op(text):
name='Sequential pipeline',
description='A pipeline with two sequential steps.'
)
def sequential_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
def sequential_pipeline(url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
"""A pipeline with two sequential steps."""
download_task = gcs_download_op(url)

View File

@ -42,9 +42,9 @@ dataproc_submit_spark_op = components.load_component_from_url(
'https://raw.githubusercontent.com/kubeflow/pipelines/01a23ae8672d3b18e88adf3036071496aca3552d/components/gcp/dataproc/submit_spark_job/component.yaml'
)
_PYSRC_PREFIX = 'gs://ml-pipeline-playground/dataproc-example' # Common path to python src.
_PYSRC_PREFIX = 'gs://ml-pipeline/sample-pipeline/xgboost' # Common path to python src.
_XGBOOST_PKG = 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'
_XGBOOST_PKG = 'gs://ml-pipeline/sample-pipeline/xgboost/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'
_TRAINER_MAIN_CLS = 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostTrainer'
@ -151,9 +151,9 @@ def dataproc_train_op(
):
if is_classification:
config='gs://ml-pipeline-playground/trainconfcla.json'
config='gs://ml-pipeline/sample-data/xgboost-config/trainconfcla.json'
else:
config='gs://ml-pipeline-playground/trainconfreg.json'
config='gs://ml-pipeline/sample-data/xgboost-config/trainconfreg.json'
return dataproc_submit_spark_op(
project_id=project,
@ -214,9 +214,9 @@ def xgb_train_pipeline(
region='us-central1'
workers=2
quota_check=[{'region':region,'metric':'CPUS','quota_needed':12.0}]
train_data='gs://ml-pipeline-playground/sfpd/train.csv'
eval_data='gs://ml-pipeline-playground/sfpd/eval.csv'
schema='gs://ml-pipeline-playground/sfpd/schema.json'
train_data='gs://ml-pipeline/sample-data/sfpd/train.csv'
eval_data='gs://ml-pipeline/sample-data/sfpd/eval.csv'
schema='gs://ml-pipeline/sample-data/sfpd/schema.json'
true_label='ACTION'
target='resolution'
required_apis='dataproc.googleapis.com'