[Samples] Change the data url to point to ml-pipeline instead of ml-pipeline-playground (#3890)

* update location in tfx sample * update xgboost * update the rest * update notebook sample
2020-06-01 22:30:15 -07:00 · 2020-06-01 22:30:15 -07:00 · 88ee54fa32
parent a7be049b6d
commit 88ee54fa32
9 changed files with 25 additions and 22 deletions
--- a/samples/core/AutoML
+++ b/samples/core/AutoML
@ -147,9 +147,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/samples/core/dataflow/dataflow.ipynb
+++ b/samples/core/dataflow/dataflow.ipynb
@ -309,10 +309,10 @@
    "    description='Dataflow launch python pipeline'\n",
    ")\n",
    "def pipeline(\n",
-    "    python_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/wc.py',\n",
+    "    python_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/wc.py',\n",
    "    project_id = project,\n",
    "    staging_dir = output,\n",
-    "    requirements_file_path = 'gs://ml-pipeline-playground/samples/dataflow/wc/requirements.txt',\n",
+    "    requirements_file_path = 'gs://ml-pipeline/sample-pipeline/word-count/requirements.txt',\n",
    "    args = json.dumps([\n",
    "        '--output', output_file\n",
    "    ]),\n",
@ -412,7 +412,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
  },
  "pycharm": {
   "stem_cell": {
--- a/samples/core/exit_handler/exit_handler.py
+++ b/samples/core/exit_handler/exit_handler.py
@ -43,7 +43,7 @@ def echo_op(text):
    name='Exit Handler',
    description='Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).'
 )
-def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'):
+def download_and_print(url='gs://ml-pipeline/shakespeare/shakespeare1.txt'):
    """A sample pipeline showing exit handler."""

    exit_task = echo_op('exit!')
--- a/samples/core/iris/iris.py
+++ b/samples/core/iris/iris.py
@ -53,17 +53,18 @@ _pipeline_name = 'iris_native_keras'
 # utility function is in iris_utils.py. Feel free to customize as needed.
 _data_root_param = data_types.RuntimeParameter(
    name='data-root',
-    default='gs://ml-pipeline-playground/iris/data',
+    default='gs://ml-pipeline/sample-data/iris/data',
    ptype=Text,
 )

 # Python module file to inject customized logic into the TFX components. The
 # Transform and Trainer both require user-defined functions to run successfully.
 # This file is fork from https://github.com/tensorflow/tfx/blob/master/tfx/examples/iris/iris_utils_native_keras.py
+# and baked into the TFX image used in the pipeline.
 _module_file_param = data_types.RuntimeParameter(
    name='module-file',
    default=
-    'gs://ml-pipeline-playground/iris/modules/iris_utils_native_keras.py',
+    '/tfx-src/tfx/examples/iris/iris_utils_native_keras.py',
    ptype=Text,
 )

--- a/samples/core/parallel_join/parallel_join.py
+++ b/samples/core/parallel_join/parallel_join.py
@ -43,8 +43,8 @@ def echo2_op(text1, text2):
  description='Download two messages in parallel and prints the concatenated result.'
 )
 def download_and_join(
-    url1='gs://ml-pipeline-playground/shakespeare1.txt',
-    url2='gs://ml-pipeline-playground/shakespeare2.txt'
+    url1='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt',
+    url2='gs://ml-pipeline/sample-data/shakespeare/shakespeare2.txt'
 ):
    """A three-step pipeline with first two running in parallel."""

--- a/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py
+++ b/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py
@ -35,17 +35,18 @@ from tfx.proto import pusher_pb2
 from tfx.proto import trainer_pb2

 # Define pipeline params used for pipeline execution.
-# Path to the module file, should be a GCS path.
+# Path to the module file, should be a GCS path,
+# or a module file baked in the docker image used by the pipeline.
 _taxi_module_file_param = data_types.RuntimeParameter(
    name='module-file',
-    default='gs://ml-pipeline-playground/tfx_taxi_simple/modules/taxi_utils.py',
+    default='/tfx-src/tfx/examples/chicago_taxi_pipeline/taxi_utils.py',
    ptype=Text,
 )

 # Path to the CSV data file, under which their should be a data.csv file.
 _data_root_param = data_types.RuntimeParameter(
    name='data-root',
-    default='gs://ml-pipeline-playground/tfx_taxi_simple/data',
+    default='gs://ml-pipeline/sample-data/chicago-taxi/data',
    ptype=Text,
 )

--- a/samples/core/secret/secret.py
+++ b/samples/core/secret/secret.py
@ -53,7 +53,8 @@ for bucket in buckets:
    name='Secret pipeline',
    description='A pipeline to demonstrate mounting and use of secretes.'
 )
-def secret_op_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
+def secret_op_pipeline(
+    url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
  """A pipeline that uses secret to access cloud hosted resouces."""

  gcs_read_task = gcs_read_op(url)
--- a/samples/core/sequential/sequential.py
+++ b/samples/core/sequential/sequential.py
@ -42,7 +42,7 @@ def echo_op(text):
    name='Sequential pipeline',
    description='A pipeline with two sequential steps.'
 )
-def sequential_pipeline(url='gs://ml-pipeline-playground/shakespeare1.txt'):
+def sequential_pipeline(url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'):
    """A pipeline with two sequential steps."""

    download_task = gcs_download_op(url)
--- a/samples/core/xgboost_training_cm/xgboost_training_cm.py
+++ b/samples/core/xgboost_training_cm/xgboost_training_cm.py
@ -42,9 +42,9 @@ dataproc_submit_spark_op = components.load_component_from_url(
    'https://raw.githubusercontent.com/kubeflow/pipelines/01a23ae8672d3b18e88adf3036071496aca3552d/components/gcp/dataproc/submit_spark_job/component.yaml'
 )

-_PYSRC_PREFIX = 'gs://ml-pipeline-playground/dataproc-example' # Common path to python src.
+_PYSRC_PREFIX = 'gs://ml-pipeline/sample-pipeline/xgboost' # Common path to python src.

-_XGBOOST_PKG = 'gs://ml-pipeline-playground/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'
+_XGBOOST_PKG = 'gs://ml-pipeline/sample-pipeline/xgboost/xgboost4j-example-0.8-SNAPSHOT-jar-with-dependencies.jar'

 _TRAINER_MAIN_CLS = 'ml.dmlc.xgboost4j.scala.example.spark.XGBoostTrainer'

@ -151,9 +151,9 @@ def dataproc_train_op(
 ):

  if is_classification:
-    config='gs://ml-pipeline-playground/trainconfcla.json'
+    config='gs://ml-pipeline/sample-data/xgboost-config/trainconfcla.json'
  else:
-    config='gs://ml-pipeline-playground/trainconfreg.json'
+    config='gs://ml-pipeline/sample-data/xgboost-config/trainconfreg.json'

  return dataproc_submit_spark_op(
      project_id=project,
@ -214,9 +214,9 @@ def xgb_train_pipeline(
    region='us-central1'
    workers=2
    quota_check=[{'region':region,'metric':'CPUS','quota_needed':12.0}]
-    train_data='gs://ml-pipeline-playground/sfpd/train.csv'
-    eval_data='gs://ml-pipeline-playground/sfpd/eval.csv'
-    schema='gs://ml-pipeline-playground/sfpd/schema.json'
+    train_data='gs://ml-pipeline/sample-data/sfpd/train.csv'
+    eval_data='gs://ml-pipeline/sample-data/sfpd/eval.csv'
+    schema='gs://ml-pipeline/sample-data/sfpd/schema.json'
    true_label='ACTION'
    target='resolution'
    required_apis='dataproc.googleapis.com'