[Samples] Change the data url to point to ml-pipeline instead of ml-pipeline-playground (#3890)
* update location in tfx sample * update xgboost * update the rest * update notebook sample
This commit is contained in:
parent
a7be049b6d
commit
88ee54fa32
|
|
@ -147,9 +147,9 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
"version": "3.7.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -309,10 +309,10 @@
|
|||
" description='Dataflow launch python pipeline'\n",
|
||||
")\n",
|
||||
"def pipeline(\n",
|
||||
" python_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/wc.py',\n",
|
||||
" python_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/wc.py',\n",
|
||||
" project_id = project,\n",
|
||||
" staging_dir = output,\n",
|
||||
" requirements_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/requirements.txt',\n",
|
||||
" requirements_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/requirements.txt',\n",
|
||||
" args = json.dumps([\n",
|
||||
" '--output', output_file\n",
|
||||
" ]),\n",
|
||||
|
|
@ -412,7 +412,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
"version": "3.7.4"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def echo_op(text):
|
|||
name='Exit Handler',
|
||||
description='Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).'
|
||||
)
|
||||
def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'):
|
||||
def download_and_print(url='gs://ml-pipeline/shakespeare/shakespeare1.txt'):
|
||||
"""A sample pipeline showing exit handler."""
|
||||
|
||||
exit_task = echo_op('exit!')
|
||||
|
|
|
|||
|
|
@ -53,17 +53,18 @@ _pipeline_name = 'iris_native_keras'
|
|||
# utility function is in iris_utils.py. Feel free to customize as needed.
|
||||
_data_root_param = data_types.RuntimeParameter(
|
||||
name='data-root',
|
||||
default='gs://ml-pipeline-playground/iris/data',
|
||||
default='gs://ml-pipeline/sample-data/iris/data',
|
||||
ptype=Text,
|
||||
)
|
||||
|
||||
# Python module file to inject customized logic into the TFX components. The
|
||||
# Transform and Trainer both require user-defined functions to run successfully.
|
||||
# This file is fork from https://github.com/tensorflow/tfx/blob/master/tfx/examples/iris/iris_utils_native_keras.py
|
||||
# and baked into the TFX image used in the pipeline.
|
||||
_module_file_param = data_types.RuntimeParameter(
|
||||
name='module-file',
|
||||
default=
|
||||
'gs://ml-pipeline-playground/iris/modules/iris_utils_native_keras.py',
|
||||
'/tfx-src/tfx/examples/iris/iris_utils_native_keras.py',
|
||||
ptype=Text,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -43,8 +43,8 @@ def echo2_op(text1, text2):
|
|||
description='Download two messages in parallel and prints the concatenated result.'
|
||||
)
|
||||
def download_and_join(
|
||||
url1='gs://ml-pipeline-playground/shakespeare1.txt',
|
||||
url2='gs://ml-pipeline-playground/shakespeare2.txt'
|
||||
url1='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt',
|
||||
url2='gs://ml-pipeline/sample-data/shakespeare/shakespeare2.txt'
|
||||
):
|
||||
"""A three-step pipeline with first two running in parallel."""
|
||||
|
||||
|
|
|
|||
|
|
@ -35,17 +35,18 @@ from tfx.proto import pusher_pb2
|
|||
from tfx.proto import trainer_pb2
|
||||
|
||||
# Define pipeline params used for pipeline execution.
|
||||
# Path to the module file, should be a GCS path.
|
||||
# Path to the module file, should be a GCS path,
|
||||
# or a module file baked in the docker image used by the pipeline.
|
||||
_taxi_module_file_param = data_types.RuntimeParameter(
|
||||
name='module-file',
|
||||
default='gs://ml-pipeline-playground/tfx_taxi_simple/modules/taxi_utils.py',
|
||||
default='/tfx-src/tfx/examples/chicago_taxi_pipeline/taxi_utils.py',
|
||||
ptype=Text,
|
||||
)
|
||||
|
||||
# Path to the CSV data file, under which their should be a data.csv file.
|
||||
_data_root_param = data_types.RuntimeParameter(
|
||||
name='data-root',
|
||||
default='gs://ml-pipeline-playground/tfx_taxi_simple/data',
|
||||
default='gs://ml-pipeline/sample-data/chicago-taxi/data',
|
||||
ptype=Text,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,8 @@ for bucket in buckets:
|
|||
name='Secret pipeline',
|
||||
description='A pipeline to demonstrate mounting and use of secretes.'
|
||||
)
|
||||
def secret_op_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
|
||||
def secret_op_pipeline(
|
||||
url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
|
||||
"""A pipeline that uses secret to access cloud hosted resouces."""
|
||||
|
||||
gcs_read_task = gcs_read_op(url)
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ def echo_op(text):
|
|||
name='Sequential pipeline',
|
||||
description='A pipeline with two sequential steps.'
|
||||
)
|
||||
def sequential_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
|
||||
def sequential_pipeline(url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
|
||||
"""A pipeline with two sequential steps."""
|
||||
|
||||
download_task = gcs_download_op(url)
|
||||
|
|
|
|||
|
|
@ -42,9 +42,9 @@ dataproc_submit_spark_op = components.load_component_from_url(
|
|||
'https://raw.githubusercontent.com/kubeflow/pipelines/01a23ae8672d3b18e88adf3036071496aca3552d/components/gcp/dataproc/submit_spark_job/component.yaml'
|
||||
)
|
||||
|
||||
_PYSRC_PREFIX = 'gs://ml-pipeline-playground/dataproc-example' # Common path to python src.
|
||||
_PYSRC_PREFIX = 'gs://ml-pipeline/sample-pipeline/xgboost' # Common path to python src.
|
||||
|
||||
_XGBOOST_PKG = 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'
|
||||
_XGBOOST_PKG = 'gs://ml-pipeline/sample-pipeline/xgboost/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'
|
||||
|
||||
_TRAINER_MAIN_CLS = 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostTrainer'
|
||||
|
||||
|
|
@ -151,9 +151,9 @@ def dataproc_train_op(
|
|||
):
|
||||
|
||||
if is_classification:
|
||||
config='gs://ml-pipeline-playground/trainconfcla.json'
|
||||
config='gs://ml-pipeline/sample-data/xgboost-config/trainconfcla.json'
|
||||
else:
|
||||
config='gs://ml-pipeline-playground/trainconfreg.json'
|
||||
config='gs://ml-pipeline/sample-data/xgboost-config/trainconfreg.json'
|
||||
|
||||
return dataproc_submit_spark_op(
|
||||
project_id=project,
|
||||
|
|
@ -214,9 +214,9 @@ def xgb_train_pipeline(
|
|||
region='us-central1'
|
||||
workers=2
|
||||
quota_check=[{'region':region,'metric':'CPUS','quota_needed':12.0}]
|
||||
train_data='gs://ml-pipeline-playground/sfpd/train.csv'
|
||||
eval_data='gs://ml-pipeline-playground/sfpd/eval.csv'
|
||||
schema='gs://ml-pipeline-playground/sfpd/schema.json'
|
||||
train_data='gs://ml-pipeline/sample-data/sfpd/train.csv'
|
||||
eval_data='gs://ml-pipeline/sample-data/sfpd/eval.csv'
|
||||
schema='gs://ml-pipeline/sample-data/sfpd/schema.json'
|
||||
true_label='ACTION'
|
||||
target='resolution'
|
||||
required_apis='dataproc.googleapis.com'
|
||||
|
|
|
|||
Loading…
Reference in New Issue