fix(sdk): add default value for inputs (#7405)

* fix(sdk): add default value for inputs * merge conflict * release * fix sample
2022-03-23 11:59:20 -07:00 · 2022-03-23 11:59:20 -07:00 · 04123280a4
parent 09ca1c2ec2
commit 04123280a4
8 changed files with 524 additions and 448 deletions
--- a/samples/test/lightweight_python_functions_v2_pipeline_test.py
+++ b/samples/test/lightweight_python_functions_v2_pipeline_test.py
@ -40,6 +40,7 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
        {
            'inputs': {
                'parameters': {
+                    'empty_message': '',
                    'message': 'message',
                }
            },
@ -61,8 +62,8 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
                'parameters': {
                    'output_bool_parameter_path': True,
                    'output_dict_parameter_path': {
-                        "A": 1,
-                        "B": 2
+                        "A": 1.0,
+                        "B": 2.0
                    },
                    'output_list_parameter_path': ["a", "b", "c"],
                    'output_parameter_path': 'message'
@ -96,7 +97,8 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
                        "B": 2.0,
                    },
                    'input_list': ["a", "b", "c"],
-                    'message': 'message'
+                    'message': 'message',
+                    'num_steps': 100.0
                }
            },
            'name': 'train',
--- a/sdk/RELEASE.md
+++ b/sdk/RELEASE.md
@ -83,6 +83,8 @@
 * Depends on `typing-extensions>=3.7.4,<5; python_version<"3.9"` [\#7288](https://github.com/kubeflow/pipelines/pull/7288)
 * Depends on `google-api-core>=1.31.5, >=2.3.2` [\#7377](https://github.com/kubeflow/pipelines/pull/7377)
 * Fix bug that required KFP API server for `kfp components build` command to work [\#7430](https://github.com/kubeflow/pipelines/pull/7430)
+* Pass default value for inputs and remove deprecated items in v1 [\#7405](https://github.com/kubeflow/pipelines/pull/7405)
+

 ## Documentation Updates

--- a/sdk/python/kfp/compiler/compiler.py
+++ b/sdk/python/kfp/compiler/compiler.py
@ -13,8 +13,7 @@
 # limitations under the License.
 """KFP DSL compiler.

-This is an experimental implementation of KFP compiler that compiles KFP
-pipeline into Pipeline IR:
+Implementation of KFP compiler that compiles KFP pipeline into Pipeline IR:
 https://docs.google.com/document/d/1PUDuSQ8vmeKSBloli53mp7GIvzekaY7sggg6ywy35Dk/
 """
 import collections
--- a/sdk/python/kfp/compiler/compiler_test.py
+++ b/sdk/python/kfp/compiler/compiler_test.py
@ -727,6 +727,37 @@ class TestWriteToFileTypes(parameterized.TestCase):
        finally:
            shutil.rmtree(tmpdir)

+    def test_compile_pipeline_with_default_value(self):
+
+        tmpdir = tempfile.mkdtemp()
+        try:
+            producer_op = components.load_component_from_text("""
+      name: producer
+      inputs:
+      - {name: location, type: String, default: 'us-central1'}
+      - {name: name, type: Integer, default: 1}
+      - {name: noDefault, type: String}
+      implementation:
+        container:
+          image: gcr.io/my-project/my-image:tag
+          args:
+          - {inputValue: location}
+      """)
+
+            @dsl.pipeline(name='test-pipeline')
+            def simple_pipeline():
+                producer = producer_op(location="1")
+
+            target_json_file = os.path.join(tmpdir, 'result.json')
+            compiler.Compiler().compile(
+                pipeline_func=simple_pipeline, package_path=target_json_file)
+
+            self.assertTrue(os.path.exists(target_json_file))
+            with open(target_json_file, 'r') as f:
+                print(f.read())
+                pass
+        finally:
+            shutil.rmtree(tmpdir)

 if __name__ == '__main__':
    unittest.main()
--- a/sdk/python/kfp/compiler/pipeline_spec_builder.py
+++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py
@ -339,13 +339,17 @@ def build_component_spec_for_task(
            continue

        # skip inputs not present, as a workaround to support optional inputs.
-        if input_name not in task.inputs:
+        if input_name not in task.inputs and input_spec.default is None:
            continue

        if type_utils.is_parameter_type(input_spec.type):
            component_spec.input_definitions.parameters[
                input_name].parameter_type = type_utils.get_parameter_type(
                    input_spec.type)
+            if input_spec.default is not None:
+                component_spec.input_definitions.parameters[
+                    input_name].default_value.CopyFrom(_to_protobuf_value(input_spec.default))
+
        else:
            component_spec.input_definitions.artifacts[
                input_name].artifact_type.CopyFrom(
@ -503,7 +507,6 @@ def build_component_spec_for_group(
                input_name].parameter_type = type_utils.get_parameter_type(
                    channel.channel_type)

-            # TODO: should we fill in default value for all groups and tasks?
            if is_root_group:
                _fill_in_component_input_default_value(
                    component_spec=component_spec,
--- a/sdk/python/kfp/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.yaml
+++ b/sdk/python/kfp/compiler_cli_tests/test_data/lightweight_python_functions_v2_pipeline.yaml
@ -7,6 +7,11 @@ deploymentSpec:
    exec-preprocess:
      container:
        image: python:3.7
+        args:
+        - --executor_input
+        - '{{$}}'
+        - --function_to_execute
+        - preprocess
        command:
        - sh
        - -c
@ -51,14 +56,13 @@ deploymentSpec:
          \ 'w') as f:\n        f.write(json.dumps(input_dict_parameter))\n\n    with\
          \ open(output_list_parameter_path, 'w') as f:\n        f.write(json.dumps(input_list_parameter))\n\
          \n"
+    exec-train:
+      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
-        - preprocess
-    exec-train:
-      container:
-        image: python:3.7
+        - train
        command:
        - sh
        - -c
@ -101,40 +105,36 @@ deploymentSpec:
          \ Model artifact, which has a .metadata dictionary\n    # to store arbitrary\
          \ metadata for the output artifact.\n    model.metadata['accuracy'] = 0.9\n\
          \n"
-        args:
-        - --executor_input
-        - '{{$}}'
-        - --function_to_execute
-        - train
+        image: python:3.7
 components:
  comp-preprocess:
    inputDefinitions:
      parameters:
-        message:
-          parameterType: STRING
        input_dict_parameter:
          parameterType: STRUCT
+        message:
+          parameterType: STRING
        input_list_parameter:
          parameterType: LIST
    outputDefinitions:
      artifacts:
-        output_dataset_one:
-          artifactType:
-            schemaTitle: system.Dataset
-            schemaVersion: 0.0.1
        output_dataset_two_path:
          artifactType:
            schemaTitle: system.Dataset
            schemaVersion: 0.0.1
+        output_dataset_one:
+          artifactType:
+            schemaTitle: system.Dataset
+            schemaVersion: 0.0.1
      parameters:
-        output_parameter_path:
-          parameterType: STRING
        output_bool_parameter_path:
          parameterType: BOOLEAN
-        output_dict_parameter_path:
-          parameterType: STRUCT
        output_list_parameter_path:
          parameterType: LIST
+        output_parameter_path:
+          parameterType: STRING
+        output_dict_parameter_path:
+          parameterType: STRUCT
    executorLabel: exec-preprocess
  comp-train:
    inputDefinitions:
@ -148,12 +148,15 @@ components:
            schemaTitle: system.Dataset
            schemaVersion: 0.0.1
      parameters:
-        message:
-          parameterType: STRING
        input_bool:
          parameterType: BOOLEAN
+        message:
+          parameterType: STRING
        input_dict:
          parameterType: STRUCT
+        num_steps:
+          parameterType: NUMBER_INTEGER
+          defaultValue: 100.0
        input_list:
          parameterType: LIST
    outputDefinitions:
@ -175,30 +178,15 @@ root:
          B: 2.0
  dag:
    tasks:
-      preprocess:
-        taskInfo:
-          name: preprocess
-        inputs:
-          parameters:
-            message:
-              componentInputParameter: message
-            input_dict_parameter:
-              componentInputParameter: input_dict
-            input_list_parameter:
-              runtimeValue:
-                constant:
-                - a
-                - b
-                - c
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-preprocess
      train:
        taskInfo:
          name: train
        inputs:
          parameters:
+            input_list:
+              taskOutputParameter:
+                producerTask: preprocess
+                outputParameterKey: output_list_parameter_path
            message:
              taskOutputParameter:
                producerTask: preprocess
@ -211,23 +199,38 @@ root:
              taskOutputParameter:
                producerTask: preprocess
                outputParameterKey: output_dict_parameter_path
-            input_list:
-              taskOutputParameter:
-                producerTask: preprocess
-                outputParameterKey: output_list_parameter_path
          artifacts:
-            dataset_one_path:
-              taskOutputArtifact:
-                producerTask: preprocess
-                outputArtifactKey: output_dataset_one
            dataset_two:
              taskOutputArtifact:
                producerTask: preprocess
                outputArtifactKey: output_dataset_two_path
+            dataset_one_path:
+              taskOutputArtifact:
+                producerTask: preprocess
+                outputArtifactKey: output_dataset_one
        dependentTasks:
        - preprocess
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-train
+      preprocess:
+        taskInfo:
+          name: preprocess
+        inputs:
+          parameters:
+            input_list_parameter:
+              runtimeValue:
+                constant:
+                - a
+                - b
+                - c
+            message:
+              componentInputParameter: message
+            input_dict_parameter:
+              componentInputParameter: input_dict
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-preprocess
 defaultPipelineRoot: dummy_root
--- a/sdk/python/kfp/compiler_cli_tests/test_data/v2_component_with_optional_inputs.yaml
+++ b/sdk/python/kfp/compiler_cli_tests/test_data/v2_component_with_optional_inputs.yaml
@ -6,7 +6,6 @@ deploymentSpec:
  executors:
    exec-component-op:
      container:
-        image: python:3.7
        command:
        - sh
        - -c
@ -29,6 +28,7 @@ deploymentSpec:
          \ {input1}, type: {type(input1)}')\n    print(f'input2: {input2}, type:\
          \ {type(input2)}')\n    print(f'input3: {input3}, type: {type(input3)}')\n\
          \n"
+        image: python:3.7
        args:
        - --executor_input
        - '{{$}}'
@ -38,10 +38,11 @@ components:
  comp-component-op:
    inputDefinitions:
      parameters:
-        input1:
-          parameterType: STRING
        input2:
          parameterType: STRING
+        input1:
+          parameterType: STRING
+          defaultValue: default value
    executorLabel: exec-component-op
 root:
  dag:
@ -51,12 +52,12 @@ root:
          name: component-op
        inputs:
          parameters:
-            input1:
-              runtimeValue:
-                constant: Hello
            input2:
              runtimeValue:
                constant: World
+            input1:
+              runtimeValue:
+                constant: Hello
        cachingOptions:
          enableCache: true
        componentRef:
--- a/sdk/python/kfp/compiler_cli_tests/test_data/xgboost_sample_pipeline.yaml
+++ b/sdk/python/kfp/compiler_cli_tests/test_data/xgboost_sample_pipeline.yaml
@ -4,26 +4,62 @@ sdkVersion: kfp-2.0.0-alpha.1
 schemaVersion: 2.1.0
 deploymentSpec:
  executors:
-    exec-chicago-taxi-trips-dataset:
+    exec-xgboost-predict-2:
      container:
-        image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
+        image: python:3.7
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - --label-column-name
+        - '{{$.inputs.parameters[''label_column_name'']}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
        command:
        - sh
        - -c
-        - "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
-          limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
-          curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
-          \ \\\n    --data-urlencode '$limit='\"${limit}\" \\\n    --data-urlencode\
-          \ '$where='\"${where}\" \\\n    --data-urlencode '$select='\"${select}\"\
-          \ \\\n    | tr -d '\"' > \"$output_path\"  # Removing unneeded quotes around\
-          \ all numbers\n"
-        - '{{$.outputs.artifacts[''table''].path}}'
-        - '{{$.inputs.parameters[''select'']}}'
-        - '{{$.inputs.parameters[''where'']}}'
-        - '{{$.inputs.parameters[''limit'']}}'
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
+          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
+          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
+          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
+          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
+          \ the feature data in Apache Parquet format.\n        model_path: Path for\
+          \ the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
+          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
+          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
+          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
+          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
+          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
+          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
+          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
+          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
+          \ Output path for the predictions.\\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
+          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
    exec-xgboost-train:
      container:
-        image: python:3.7
        command:
        - sh
        - -c
@ -129,9 +165,116 @@ deploymentSpec:
        - '{{$.outputs.artifacts[''model''].path}}'
        - --model-config
        - '{{$.outputs.artifacts[''model_config''].path}}'
-    exec-xgboost-predict:
+        image: python:3.7
+    exec-xgboost-predict-3:
      container:
        image: python:3.7
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
+          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
+          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
+          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
+          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
+          \ the feature data in Apache Parquet format.\n        model_path: Path for\
+          \ the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
+          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
+          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
+          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
+          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
+          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
+          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
+          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
+          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
+          \ Output path for the predictions.\\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
+          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - --label-column-name
+        - '{{$.inputs.parameters[''label_column_name'']}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
+    exec-chicago-taxi-trips-dataset:
+      container:
+        command:
+        - sh
+        - -c
+        - "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
+          limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
+          curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
+          \ \\\n    --data-urlencode '$limit='\"${limit}\" \\\n    --data-urlencode\
+          \ '$where='\"${where}\" \\\n    --data-urlencode '$select='\"${select}\"\
+          \ \\\n    | tr -d '\"' > \"$output_path\"  # Removing unneeded quotes around\
+          \ all numbers\n"
+        - '{{$.outputs.artifacts[''table''].path}}'
+        - '{{$.inputs.parameters[''select'']}}'
+        - '{{$.inputs.parameters[''where'']}}'
+        - '{{$.inputs.parameters[''limit'']}}'
+        image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
+    exec-convert-csv-to-apache-parquet:
+      container:
+        image: python:3.7
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --output-data
+        - '{{$.outputs.artifacts[''output_data''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
+          --quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef convert_csv_to_apache_parquet(\n    data_path,\n    output_data_path,\n\
+          ):\n    '''Converts CSV table to Apache Parquet.\n\n    [Apache Parquet](https://parquet.apache.org/)\n\
+          \n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pyarrow import csv, parquet\n\n    table = csv.read_csv(data_path)\n\
+          \    parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
+          \ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
+          \ CSV table to Apache Parquet.\\n\\n    [Apache Parquet](https://parquet.apache.org/)\\\
+          n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
+          output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          _output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
+          \n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
+          \    try:\n        os.makedirs(os.path.dirname(output_file))\n    except\
+          \ OSError:\n        pass\n    with open(output_file, 'w') as f:\n      \
+          \  f.write(_output_serializers[idx](_outputs[idx]))\n"
+    exec-xgboost-predict-4:
+      container:
        command:
        - sh
        - -c
@ -171,6 +314,7 @@ deploymentSpec:
          _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
          \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
        args:
        - --data
        - '{{$.inputs.artifacts[''data''].path}}'
@ -180,45 +324,21 @@ deploymentSpec:
        - '{{$.inputs.parameters[''label_column'']}}'
        - --predictions
        - '{{$.outputs.artifacts[''predictions''].path}}'
-    exec-convert-csv-to-apache-parquet:
-      container:
-        image: python:3.7
-        command:
-        - sh
-        - -c
-        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
-          'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
-          --quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
-        - python3
-        - -u
-        - -c
-        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
-          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
-          \ file_path\n\ndef convert_csv_to_apache_parquet(\n    data_path,\n    output_data_path,\n\
-          ):\n    '''Converts CSV table to Apache Parquet.\n\n    [Apache Parquet](https://parquet.apache.org/)\n\
-          \n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
-          \    '''\n    from pyarrow import csv, parquet\n\n    table = csv.read_csv(data_path)\n\
-          \    parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
-          \ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
-          \ CSV table to Apache Parquet.\\n\\n    [Apache Parquet](https://parquet.apache.org/)\\\
-          n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
-          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
-          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
-          output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
-          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
-          _output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
-          \n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
-          \    try:\n        os.makedirs(os.path.dirname(output_file))\n    except\
-          \ OSError:\n        pass\n    with open(output_file, 'w') as f:\n      \
-          \  f.write(_output_serializers[idx](_outputs[idx]))\n"
-        args:
-        - --data
-        - '{{$.inputs.artifacts[''data''].path}}'
-        - --output-data
-        - '{{$.outputs.artifacts[''output_data''].path}}'
    exec-xgboost-train-2:
      container:
-        image: python:3.7
+        args:
+        - --training-data
+        - '{{$.inputs.artifacts[''training_data''].path}}'
+        - --label-column-name
+        - '{{$.inputs.parameters[''label_column_name'']}}'
+        - --num-iterations
+        - '{{$.inputs.parameters[''num_iterations'']}}'
+        - --objective
+        - '{{$.inputs.parameters[''objective'']}}'
+        - --model
+        - '{{$.outputs.artifacts[''model''].path}}'
+        - --model-config
+        - '{{$.outputs.artifacts[''model_config''].path}}'
        command:
        - sh
        - -c
@ -311,130 +431,9 @@ deploymentSpec:
          , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
          \n_outputs = xgboost_train(**_parsed_args)\n"
-        args:
-        - --training-data
-        - '{{$.inputs.artifacts[''training_data''].path}}'
-        - --label-column-name
-        - '{{$.inputs.parameters[''label_column_name'']}}'
-        - --num-iterations
-        - '{{$.inputs.parameters[''num_iterations'']}}'
-        - --objective
-        - '{{$.inputs.parameters[''objective'']}}'
-        - --model
-        - '{{$.outputs.artifacts[''model''].path}}'
-        - --model-config
-        - '{{$.outputs.artifacts[''model_config''].path}}'
-    exec-xgboost-predict-2:
-      container:
        image: python:3.7
-        command:
-        - sh
-        - -c
-        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
-          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
-          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
-          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
-        - python3
-        - -u
-        - -c
-        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
-          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
-          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
-          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
-          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
-          \ the feature data in Apache Parquet format.\n        model_path: Path for\
-          \ the trained model in binary XGBoost format.\n        predictions_path:\
-          \ Output path for the predictions.\n        label_column_name: Optional.\
-          \ Name of the column containing the label data that is excluded during the\
-          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
-          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
-          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
-          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
-          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
-          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
-          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
-          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
-          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
-          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
-          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
-          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
-          \ Output path for the predictions.\\n        label_column_name: Optional.\
-          \ Name of the column containing the label data that is excluded during the\
-          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
-          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
-          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
-          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
-          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
-          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
-          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
-          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
-          \n_outputs = xgboost_predict(**_parsed_args)\n"
-        args:
-        - --data
-        - '{{$.inputs.artifacts[''data''].path}}'
-        - --model
-        - '{{$.inputs.artifacts[''model''].path}}'
-        - --label-column-name
-        - '{{$.inputs.parameters[''label_column_name'']}}'
-        - --predictions
-        - '{{$.outputs.artifacts[''predictions''].path}}'
-    exec-xgboost-predict-3:
+    exec-xgboost-predict:
      container:
-        image: python:3.7
-        command:
-        - sh
-        - -c
-        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
-          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
-          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
-          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
-        - python3
-        - -u
-        - -c
-        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
-          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
-          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
-          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
-          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
-          \ the feature data in Apache Parquet format.\n        model_path: Path for\
-          \ the trained model in binary XGBoost format.\n        predictions_path:\
-          \ Output path for the predictions.\n        label_column_name: Optional.\
-          \ Name of the column containing the label data that is excluded during the\
-          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
-          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
-          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
-          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
-          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
-          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
-          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
-          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
-          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
-          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
-          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
-          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
-          \ Output path for the predictions.\\n        label_column_name: Optional.\
-          \ Name of the column containing the label data that is excluded during the\
-          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
-          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
-          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
-          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
-          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
-          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
-          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
-          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
-          \n_outputs = xgboost_predict(**_parsed_args)\n"
-        args:
-        - --data
-        - '{{$.inputs.artifacts[''data''].path}}'
-        - --model
-        - '{{$.inputs.artifacts[''model''].path}}'
-        - --label-column-name
-        - '{{$.inputs.parameters[''label_column_name'']}}'
-        - --predictions
-        - '{{$.outputs.artifacts[''predictions''].path}}'
-    exec-xgboost-predict-4:
-      container:
-        image: python:3.7
        command:
        - sh
        - -c
@ -474,6 +473,7 @@ deploymentSpec:
          _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
          \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
        args:
        - --data
        - '{{$.inputs.artifacts[''data''].path}}'
@ -487,12 +487,18 @@ components:
  comp-chicago-taxi-trips-dataset:
    inputDefinitions:
      parameters:
-        where:
-          parameterType: STRING
-        limit:
-          parameterType: NUMBER_INTEGER
        select:
          parameterType: STRING
+          defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location
+        limit:
+          parameterType: NUMBER_INTEGER
+          defaultValue: '1000'
+        format:
+          parameterType: STRING
+          defaultValue: csv
+        where:
+          parameterType: STRING
+          defaultValue: trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"
    outputDefinitions:
      artifacts:
        table:
@ -500,52 +506,6 @@ components:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
    executorLabel: exec-chicago-taxi-trips-dataset
-  comp-xgboost-train:
-    inputDefinitions:
-      artifacts:
-        training_data:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-      parameters:
-        label_column:
-          parameterType: NUMBER_INTEGER
-        num_iterations:
-          parameterType: NUMBER_INTEGER
-        objective:
-          parameterType: STRING
-    outputDefinitions:
-      artifacts:
-        model:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-        model_config:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-    executorLabel: exec-xgboost-train
-  comp-xgboost-predict:
-    inputDefinitions:
-      artifacts:
-        data:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-        model:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-      parameters:
-        label_column:
-          parameterType: NUMBER_INTEGER
-    outputDefinitions:
-      artifacts:
-        predictions:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-    executorLabel: exec-xgboost-predict
  comp-convert-csv-to-apache-parquet:
    inputDefinitions:
      artifacts:
@ -560,7 +520,7 @@ components:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
    executorLabel: exec-convert-csv-to-apache-parquet
-  comp-xgboost-train-2:
+  comp-xgboost-train:
    inputDefinitions:
      artifacts:
        training_data:
@ -568,65 +528,38 @@ components:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
      parameters:
-        label_column_name:
+        booster:
          parameterType: STRING
-        num_iterations:
+          defaultValue: gbtree
+        learning_rate:
+          parameterType: NUMBER_DOUBLE
+          defaultValue: '0.3'
+        max_depth:
          parameterType: NUMBER_INTEGER
+          defaultValue: '6'
        objective:
          parameterType: STRING
+          defaultValue: reg:squarederror
+        num_iterations:
+          parameterType: NUMBER_INTEGER
+          defaultValue: '10'
+        label_column:
+          parameterType: NUMBER_INTEGER
+          defaultValue: '0'
+        min_split_loss:
+          parameterType: NUMBER_DOUBLE
+          defaultValue: '0'
    outputDefinitions:
      artifacts:
-        model:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
        model_config:
          artifactType:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
-    executorLabel: exec-xgboost-train-2
-  comp-xgboost-predict-2:
-    inputDefinitions:
-      artifacts:
-        data:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
        model:
          artifactType:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
-      parameters:
-        label_column_name:
-          parameterType: STRING
-    outputDefinitions:
-      artifacts:
-        predictions:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-    executorLabel: exec-xgboost-predict-2
-  comp-xgboost-predict-3:
-    inputDefinitions:
-      artifacts:
-        data:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-        model:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-      parameters:
-        label_column_name:
-          parameterType: STRING
-    outputDefinitions:
-      artifacts:
-        predictions:
-          artifactType:
-            schemaTitle: system.Artifact
-            schemaVersion: 0.0.1
-    executorLabel: exec-xgboost-predict-3
+    executorLabel: exec-xgboost-train
  comp-xgboost-predict-4:
    inputDefinitions:
      artifacts:
@ -648,53 +581,111 @@ components:
            schemaTitle: system.Artifact
            schemaVersion: 0.0.1
    executorLabel: exec-xgboost-predict-4
+  comp-xgboost-predict-2:
+    inputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column_name:
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+    executorLabel: exec-xgboost-predict-2
+  comp-xgboost-train-2:
+    inputDefinitions:
+      artifacts:
+        training_data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        learning_rate:
+          parameterType: NUMBER_DOUBLE
+          defaultValue: '0.3'
+        objective:
+          parameterType: STRING
+          defaultValue: reg:squarederror
+        min_split_loss:
+          parameterType: NUMBER_DOUBLE
+          defaultValue: '0'
+        max_depth:
+          parameterType: NUMBER_INTEGER
+          defaultValue: '6'
+        num_iterations:
+          parameterType: NUMBER_INTEGER
+          defaultValue: '10'
+        label_column_name:
+          parameterType: STRING
+        booster:
+          parameterType: STRING
+          defaultValue: gbtree
+    outputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model_config:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+    executorLabel: exec-xgboost-train-2
+  comp-xgboost-predict-3:
+    inputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column_name:
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+    executorLabel: exec-xgboost-predict-3
+  comp-xgboost-predict:
+    inputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column:
+          parameterType: NUMBER_INTEGER
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+    executorLabel: exec-xgboost-predict
 root:
  dag:
    tasks:
-      chicago-taxi-trips-dataset:
-        taskInfo:
-          name: chicago-taxi-trips-dataset
-        inputs:
-          parameters:
-            where:
-              runtimeValue:
-                constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
-                  < "2019-02-01"
-            select:
-              runtimeValue:
-                constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
-            limit:
-              runtimeValue:
-                constant: 10000.0
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-chicago-taxi-trips-dataset
-      xgboost-train:
-        taskInfo:
-          name: xgboost-train
-        inputs:
-          parameters:
-            label_column:
-              runtimeValue:
-                constant: 0.0
-            objective:
-              runtimeValue:
-                constant: reg:squarederror
-            num_iterations:
-              runtimeValue:
-                constant: 200.0
-          artifacts:
-            training_data:
-              taskOutputArtifact:
-                producerTask: chicago-taxi-trips-dataset
-                outputArtifactKey: table
-        dependentTasks:
-        - chicago-taxi-trips-dataset
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-xgboost-train
      xgboost-predict:
        taskInfo:
          name: xgboost-predict
@ -719,6 +710,55 @@ root:
          enableCache: true
        componentRef:
          name: comp-xgboost-predict
+      xgboost-predict-3:
+        taskInfo:
+          name: xgboost-predict-3
+        inputs:
+          parameters:
+            label_column_name:
+              runtimeValue:
+                constant: tips
+          artifacts:
+            model:
+              taskOutputArtifact:
+                producerTask: xgboost-train
+                outputArtifactKey: model
+            data:
+              taskOutputArtifact:
+                producerTask: convert-csv-to-apache-parquet
+                outputArtifactKey: output_data
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        - xgboost-train
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict-3
+      xgboost-train:
+        taskInfo:
+          name: xgboost-train
+        inputs:
+          parameters:
+            num_iterations:
+              runtimeValue:
+                constant: 200.0
+            label_column:
+              runtimeValue:
+                constant: 0.0
+            objective:
+              runtimeValue:
+                constant: reg:squarederror
+          artifacts:
+            training_data:
+              taskOutputArtifact:
+                producerTask: chicago-taxi-trips-dataset
+                outputArtifactKey: table
+        dependentTasks:
+        - chicago-taxi-trips-dataset
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-train
      convert-csv-to-apache-parquet:
        taskInfo:
          name: convert-csv-to-apache-parquet
@ -734,79 +774,6 @@ root:
          enableCache: true
        componentRef:
          name: comp-convert-csv-to-apache-parquet
-      xgboost-train-2:
-        taskInfo:
-          name: xgboost-train-2
-        inputs:
-          parameters:
-            label_column_name:
-              runtimeValue:
-                constant: tips
-            objective:
-              runtimeValue:
-                constant: reg:squarederror
-            num_iterations:
-              runtimeValue:
-                constant: 200.0
-          artifacts:
-            training_data:
-              taskOutputArtifact:
-                producerTask: convert-csv-to-apache-parquet
-                outputArtifactKey: output_data
-        dependentTasks:
-        - convert-csv-to-apache-parquet
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-xgboost-train-2
-      xgboost-predict-2:
-        taskInfo:
-          name: xgboost-predict-2
-        inputs:
-          parameters:
-            label_column_name:
-              runtimeValue:
-                constant: tips
-          artifacts:
-            data:
-              taskOutputArtifact:
-                producerTask: convert-csv-to-apache-parquet
-                outputArtifactKey: output_data
-            model:
-              taskOutputArtifact:
-                producerTask: xgboost-train-2
-                outputArtifactKey: model
-        dependentTasks:
-        - convert-csv-to-apache-parquet
-        - xgboost-train-2
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-xgboost-predict-2
-      xgboost-predict-3:
-        taskInfo:
-          name: xgboost-predict-3
-        inputs:
-          parameters:
-            label_column_name:
-              runtimeValue:
-                constant: tips
-          artifacts:
-            data:
-              taskOutputArtifact:
-                producerTask: convert-csv-to-apache-parquet
-                outputArtifactKey: output_data
-            model:
-              taskOutputArtifact:
-                producerTask: xgboost-train
-                outputArtifactKey: model
-        dependentTasks:
-        - convert-csv-to-apache-parquet
-        - xgboost-train
-        cachingOptions:
-          enableCache: true
-        componentRef:
-          name: comp-xgboost-predict-3
      xgboost-predict-4:
        taskInfo:
          name: xgboost-predict-4
@ -831,4 +798,72 @@ root:
          enableCache: true
        componentRef:
          name: comp-xgboost-predict-4
+      chicago-taxi-trips-dataset:
+        taskInfo:
+          name: chicago-taxi-trips-dataset
+        inputs:
+          parameters:
+            limit:
+              runtimeValue:
+                constant: 10000.0
+            where:
+              runtimeValue:
+                constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
+                  < "2019-02-01"
+            select:
+              runtimeValue:
+                constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-chicago-taxi-trips-dataset
+      xgboost-train-2:
+        taskInfo:
+          name: xgboost-train-2
+        inputs:
+          parameters:
+            objective:
+              runtimeValue:
+                constant: reg:squarederror
+            num_iterations:
+              runtimeValue:
+                constant: 200.0
+            label_column_name:
+              runtimeValue:
+                constant: tips
+          artifacts:
+            training_data:
+              taskOutputArtifact:
+                producerTask: convert-csv-to-apache-parquet
+                outputArtifactKey: output_data
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-train-2
+      xgboost-predict-2:
+        taskInfo:
+          name: xgboost-predict-2
+        inputs:
+          parameters:
+            label_column_name:
+              runtimeValue:
+                constant: tips
+          artifacts:
+            model:
+              taskOutputArtifact:
+                producerTask: xgboost-train-2
+                outputArtifactKey: model
+            data:
+              taskOutputArtifact:
+                producerTask: convert-csv-to-apache-parquet
+                outputArtifactKey: output_data
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        - xgboost-train-2
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict-2
 defaultPipelineRoot: dummy_root