chore: fix CI by adding a dependency of python/sdk for the e2e-test (#11221)

Signed-off-by: Ricardo M. Oliveira <rmartine@redhat.com>
2024-09-17 18:30:27 -03:00 · 2024-09-17 18:30:27 -03:00 · ba006bddcb
parent 581b7e5b7e
commit ba006bddcb
2 changed files with 1021 additions and 0 deletions
--- a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py
+++ b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py
@ -0,0 +1,95 @@
+# Copyright 2021 The Kubeflow Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kfp import compiler
+from kfp import components
+from kfp import dsl
+
+chicago_taxi_dataset_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/60a2612541ec08c6a85c237d2ec7525b12543a43/components/datasets/Chicago_Taxi_Trips/component.yaml'
+)
+convert_csv_to_apache_parquet_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/0d7d6f41c92bdc05c2825232afe2b47e5cb6c4b3/components/_converters/ApacheParquet/from_CSV/component.yaml'
+)
+xgboost_train_on_csv_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'
+)
+xgboost_predict_on_csv_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/31939086d66d633732f75300ce69eb60e9fb0269/components/XGBoost/Predict/component.yaml'
+)
+xgboost_train_on_parquet_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/0ae2f30ff24beeef1c64cc7c434f1f652c065192/components/XGBoost/Train/from_ApacheParquet/component.yaml'
+)
+xgboost_predict_on_parquet_op = components.load_component_from_url(
+    'https://raw.githubusercontent.com/kubeflow/pipelines/31939086d66d633732f75300ce69eb60e9fb0269/components/XGBoost/Predict/from_ApacheParquet/component.yaml'
+)
+
+
+@dsl.pipeline(name='xgboost-sample-pipeline')
+def xgboost_pipeline():
+    training_data_csv = chicago_taxi_dataset_op(
+        where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
+        select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
+        limit=10000,
+    ).output
+
+    # Training and prediction on dataset in CSV format
+    model_trained_on_csv = xgboost_train_on_csv_op(
+        training_data=training_data_csv,
+        label_column=0,
+        objective='reg:squarederror',
+        num_iterations=200,
+    ).outputs['model']
+
+    xgboost_predict_on_csv_op(
+        data=training_data_csv,
+        model=model_trained_on_csv,
+        label_column=0,
+    )
+
+    # Training and prediction on dataset in Apache Parquet format
+    training_data_parquet = convert_csv_to_apache_parquet_op(
+        data=training_data_csv).output
+
+    model_trained_on_parquet = xgboost_train_on_parquet_op(
+        training_data=training_data_parquet,
+        label_column_name='tips',
+        objective='reg:squarederror',
+        num_iterations=200,
+    ).outputs['model']
+
+    xgboost_predict_on_parquet_op(
+        data=training_data_parquet,
+        model=model_trained_on_parquet,
+        label_column_name='tips',
+    )
+
+    # Checking cross-format predictions
+    xgboost_predict_on_parquet_op(
+        data=training_data_parquet,
+        model=model_trained_on_csv,
+        label_column_name='tips',
+    )
+
+    xgboost_predict_on_csv_op(
+        data=training_data_csv,
+        model=model_trained_on_parquet,
+        label_column=0,
+    )
+
+
+if __name__ == '__main__':
+    compiler.Compiler().compile(
+        pipeline_func=xgboost_pipeline,
+        package_path=__file__.replace('.py', '.yaml'))
--- a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml
+++ b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml
@ -0,0 +1,926 @@
+# PIPELINE DEFINITION
+# Name: xgboost-sample-pipeline
+components:
+  comp-chicago-taxi-trips-dataset:
+    executorLabel: exec-chicago-taxi-trips-dataset
+    inputDefinitions:
+      parameters:
+        format:
+          defaultValue: csv
+          isOptional: true
+          parameterType: STRING
+        limit:
+          defaultValue: 1000.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        select:
+          defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location
+          isOptional: true
+          parameterType: STRING
+        where:
+          defaultValue: trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"
+          isOptional: true
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        table:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-convert-csv-to-apache-parquet:
+    executorLabel: exec-convert-csv-to-apache-parquet
+    inputDefinitions:
+      artifacts:
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+    outputDefinitions:
+      artifacts:
+        output_data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-predict:
+    executorLabel: exec-xgboost-predict
+    inputDefinitions:
+      artifacts:
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column:
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-predict-2:
+    executorLabel: exec-xgboost-predict-2
+    inputDefinitions:
+      artifacts:
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column_name:
+          isOptional: true
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-predict-3:
+    executorLabel: exec-xgboost-predict-3
+    inputDefinitions:
+      artifacts:
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column_name:
+          isOptional: true
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-predict-4:
+    executorLabel: exec-xgboost-predict-4
+    inputDefinitions:
+      artifacts:
+        data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        label_column:
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+    outputDefinitions:
+      artifacts:
+        predictions:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-train:
+    executorLabel: exec-xgboost-train
+    inputDefinitions:
+      artifacts:
+        starting_model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+          isOptional: true
+        training_data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        booster:
+          defaultValue: gbtree
+          isOptional: true
+          parameterType: STRING
+        booster_params:
+          isOptional: true
+          parameterType: STRUCT
+        label_column:
+          defaultValue: 0.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        learning_rate:
+          defaultValue: 0.3
+          isOptional: true
+          parameterType: NUMBER_DOUBLE
+        max_depth:
+          defaultValue: 6.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        min_split_loss:
+          defaultValue: 0.0
+          isOptional: true
+          parameterType: NUMBER_DOUBLE
+        num_iterations:
+          defaultValue: 10.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        objective:
+          defaultValue: reg:squarederror
+          isOptional: true
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model_config:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+  comp-xgboost-train-2:
+    executorLabel: exec-xgboost-train-2
+    inputDefinitions:
+      artifacts:
+        starting_model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+          isOptional: true
+        training_data:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+      parameters:
+        booster:
+          defaultValue: gbtree
+          isOptional: true
+          parameterType: STRING
+        booster_params:
+          isOptional: true
+          parameterType: STRUCT
+        label_column_name:
+          parameterType: STRING
+        learning_rate:
+          defaultValue: 0.3
+          isOptional: true
+          parameterType: NUMBER_DOUBLE
+        max_depth:
+          defaultValue: 6.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        min_split_loss:
+          defaultValue: 0.0
+          isOptional: true
+          parameterType: NUMBER_DOUBLE
+        num_iterations:
+          defaultValue: 10.0
+          isOptional: true
+          parameterType: NUMBER_INTEGER
+        objective:
+          defaultValue: reg:squarederror
+          isOptional: true
+          parameterType: STRING
+    outputDefinitions:
+      artifacts:
+        model:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+        model_config:
+          artifactType:
+            schemaTitle: system.Artifact
+            schemaVersion: 0.0.1
+deploymentSpec:
+  executors:
+    exec-chicago-taxi-trips-dataset:
+      container:
+        command:
+        - sh
+        - -c
+        - "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
+          limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
+          curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
+          \ \\\n    --data-urlencode '$limit='\"${limit}\" \\\n    --data-urlencode\
+          \ '$where='\"${where}\" \\\n    --data-urlencode '$select='\"${select}\"\
+          \ \\\n    | tr -d '\"' > \"$output_path\"  # Removing unneeded quotes around\
+          \ all numbers\n"
+        - '{{$.outputs.artifacts[''table''].path}}'
+        - '{{$.inputs.parameters[''select'']}}'
+        - '{{$.inputs.parameters[''where'']}}'
+        - '{{$.inputs.parameters[''limit'']}}'
+        - '{{$.inputs.parameters[''format'']}}'
+        image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
+    exec-convert-csv-to-apache-parquet:
+      container:
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --output-data
+        - '{{$.outputs.artifacts[''output_data''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
+          --quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef convert_csv_to_apache_parquet(\n    data_path,\n    output_data_path,\n\
+          ):\n    '''Converts CSV table to Apache Parquet.\n\n    [Apache Parquet](https://parquet.apache.org/)\n\
+          \n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pyarrow import csv, parquet\n\n    table = csv.read_csv(data_path)\n\
+          \    parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
+          \ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
+          \ CSV table to Apache Parquet.\\n\\n    [Apache Parquet](https://parquet.apache.org/)\\\
+          n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
+          output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          _output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
+          \n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
+          \    try:\n        os.makedirs(os.path.dirname(output_file))\n    except\
+          \ OSError:\n        pass\n    with open(output_file, 'w') as f:\n      \
+          \  f.write(_output_serializers[idx](_outputs[idx]))\n"
+        image: python:3.7
+    exec-xgboost-predict:
+      container:
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - '{"IfPresent": {"InputName": "label_column", "Then": ["--label-column",
+          "{{$.inputs.parameters[''label_column'']}}"]}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
+          -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
+          --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,  # Also supports LibSVM\n\
+          \    model_path,\n    predictions_path,\n    label_column = None,\n):\n\
+          \    '''Make predictions using a trained XGBoost model.\n\n    Args:\n \
+          \       data_path: Path for the feature data in CSV format.\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column: Column containing\
+          \ the label data.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    df = pandas.read_csv(\n        data_path,\n \
+          \   )\n\n    if label_column is not None:\n        df = df.drop(columns=[df.columns[label_column]])\n\
+          \n    testing_data = xgboost.DMatrix(\n        data=df,\n    )\n\n    model\
+          \ = xgboost.Booster(model_file=model_path)\n\n    predictions = model.predict(testing_data)\n\
+          \n    Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n\
+          \    numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser\
+          \ = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions\
+          \ using a trained XGBoost model.\\n\\n    Args:\\n        data_path: Path\
+          \ for the feature data in CSV format.\\n        model_path: Path for the\
+          \ trained model in binary XGBoost format.\\n        predictions_path: Output\
+          \ path for the predictions.\\n        label_column: Column containing the\
+          \ label data.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
+          \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
+    exec-xgboost-predict-2:
+      container:
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - '{"IfPresent": {"InputName": "label_column_name", "Then": ["--label-column-name",
+          "{{$.inputs.parameters[''label_column_name'']}}"]}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
+          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
+          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
+          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
+          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
+          \ the feature data in Apache Parquet format.\n        model_path: Path for\
+          \ the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
+          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
+          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
+          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
+          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
+          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
+          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
+          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
+          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
+          \ Output path for the predictions.\\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
+          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
+    exec-xgboost-predict-3:
+      container:
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - '{"IfPresent": {"InputName": "label_column_name", "Then": ["--label-column-name",
+          "{{$.inputs.parameters[''label_column_name'']}}"]}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
+          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
+          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,\n    model_path,\n \
+          \   predictions_path,\n    label_column_name = None,\n):\n    '''Make predictions\
+          \ using a trained XGBoost model.\n\n    Args:\n        data_path: Path for\
+          \ the feature data in Apache Parquet format.\n        model_path: Path for\
+          \ the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(data_path)\n\
+          \    if label_column_name:\n        df = df.drop(columns=[label_column_name])\n\
+          \n    evaluation_data = xgboost.DMatrix(\n        data=df,\n    )\n\n  \
+          \  # Training\n    model = xgboost.Booster(model_file=model_path)\n\n  \
+          \  predictions = model.predict(evaluation_data)\n\n    Path(predictions_path).parent.mkdir(parents=True,\
+          \ exist_ok=True)\n    numpy.savetxt(predictions_path, predictions)\n\nimport\
+          \ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
+          \ predictions using a trained XGBoost model.\\n\\n    Args:\\n        data_path:\
+          \ Path for the feature data in Apache Parquet format.\\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\\n        predictions_path:\
+          \ Output path for the predictions.\\n        label_column_name: Optional.\
+          \ Name of the column containing the label data that is excluded during the\
+          \ prediction.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column-name\", dest=\"label_column_name\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
+          predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
+    exec-xgboost-predict-4:
+      container:
+        args:
+        - --data
+        - '{{$.inputs.artifacts[''data''].path}}'
+        - --model
+        - '{{$.inputs.artifacts[''model''].path}}'
+        - '{"IfPresent": {"InputName": "label_column", "Then": ["--label-column",
+          "{{$.inputs.parameters[''label_column'']}}"]}}'
+        - --predictions
+        - '{{$.outputs.artifacts[''predictions''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
+          -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
+          --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_predict(\n    data_path,  # Also supports LibSVM\n\
+          \    model_path,\n    predictions_path,\n    label_column = None,\n):\n\
+          \    '''Make predictions using a trained XGBoost model.\n\n    Args:\n \
+          \       data_path: Path for the feature data in CSV format.\n        model_path:\
+          \ Path for the trained model in binary XGBoost format.\n        predictions_path:\
+          \ Output path for the predictions.\n        label_column: Column containing\
+          \ the label data.\n\n    Annotations:\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
+          \    '''\n    from pathlib import Path\n\n    import numpy\n    import pandas\n\
+          \    import xgboost\n\n    df = pandas.read_csv(\n        data_path,\n \
+          \   )\n\n    if label_column is not None:\n        df = df.drop(columns=[df.columns[label_column]])\n\
+          \n    testing_data = xgboost.DMatrix(\n        data=df,\n    )\n\n    model\
+          \ = xgboost.Booster(model_file=model_path)\n\n    predictions = model.predict(testing_data)\n\
+          \n    Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n\
+          \    numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser\
+          \ = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions\
+          \ using a trained XGBoost model.\\n\\n    Args:\\n        data_path: Path\
+          \ for the feature data in CSV format.\\n        model_path: Path for the\
+          \ trained model in binary XGBoost format.\\n        predictions_path: Output\
+          \ path for the predictions.\\n        label_column: Column containing the\
+          \ label data.\\n\\n    Annotations:\\n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
+          _parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
+          model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
+          \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_predict(**_parsed_args)\n"
+        image: python:3.7
+    exec-xgboost-train:
+      container:
+        args:
+        - --training-data
+        - '{{$.inputs.artifacts[''training_data''].path}}'
+        - '{"IfPresent": {"InputName": "starting_model", "Then": ["--starting-model",
+          "{{$.inputs.artifacts[''starting_model''].path}}"]}}'
+        - '{"IfPresent": {"InputName": "label_column", "Then": ["--label-column",
+          "{{$.inputs.parameters[''label_column'']}}"]}}'
+        - '{"IfPresent": {"InputName": "num_iterations", "Then": ["--num-iterations",
+          "{{$.inputs.parameters[''num_iterations'']}}"]}}'
+        - '{"IfPresent": {"InputName": "booster_params", "Then": ["--booster-params",
+          "{{$.inputs.parameters[''booster_params'']}}"]}}'
+        - '{"IfPresent": {"InputName": "objective", "Then": ["--objective", "{{$.inputs.parameters[''objective'']}}"]}}'
+        - '{"IfPresent": {"InputName": "booster", "Then": ["--booster", "{{$.inputs.parameters[''booster'']}}"]}}'
+        - '{"IfPresent": {"InputName": "learning_rate", "Then": ["--learning-rate",
+          "{{$.inputs.parameters[''learning_rate'']}}"]}}'
+        - '{"IfPresent": {"InputName": "min_split_loss", "Then": ["--min-split-loss",
+          "{{$.inputs.parameters[''min_split_loss'']}}"]}}'
+        - '{"IfPresent": {"InputName": "max_depth", "Then": ["--max-depth", "{{$.inputs.parameters[''max_depth'']}}"]}}'
+        - --model
+        - '{{$.outputs.artifacts[''model''].path}}'
+        - --model-config
+        - '{{$.outputs.artifacts[''model_config''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
+          -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
+          --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_train(\n    training_data_path,  # Also supports\
+          \ LibSVM\n    model_path,\n    model_config_path,\n    starting_model_path\
+          \ = None,\n\n    label_column = 0,\n    num_iterations = 10,\n    booster_params\
+          \ = None,\n\n    # Booster parameters\n    objective = 'reg:squarederror',\n\
+          \    booster = 'gbtree',\n    learning_rate = 0.3,\n    min_split_loss =\
+          \ 0,\n    max_depth = 6,\n):\n    '''Train an XGBoost model.\n\n    Args:\n\
+          \        training_data_path: Path for the training data in CSV format.\n\
+          \        model_path: Output path for the trained model in binary XGBoost\
+          \ format.\n        model_config_path: Output path for the internal parameter\
+          \ configuration of Booster as a JSON string.\n        starting_model_path:\
+          \ Path for the existing trained model to start from.\n        label_column:\
+          \ Column containing the label data.\n        num_boost_rounds: Number of\
+          \ boosting iterations.\n        booster_params: Parameters for the booster.\
+          \ See https://xgboost.readthedocs.io/en/latest/parameter.html\n        objective:\
+          \ The learning task and the corresponding learning objective.\n        \
+          \    See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n\
+          \            The most common values are:\n            \"reg:squarederror\"\
+          \ - Regression with squared loss (default).\n            \"reg:logistic\"\
+          \ - Logistic regression.\n            \"binary:logistic\" - Logistic regression\
+          \ for binary classification, output probability.\n            \"binary:logitraw\"\
+          \ - Logistic regression for binary classification, output score before logistic\
+          \ transformation\n            \"rank:pairwise\" - Use LambdaMART to perform\
+          \ pairwise ranking where the pairwise loss is minimized\n            \"\
+          rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
+          \ Discounted Cumulative Gain (NDCG) is maximized\n\n    Annotations:\n \
+          \       author: Alexey Volkov <alexey.volkov@ark-kun.com>\n    '''\n   \
+          \ import pandas\n    import xgboost\n\n    df = pandas.read_csv(\n     \
+          \   training_data_path,\n    )\n\n    training_data = xgboost.DMatrix(\n\
+          \        data=df.drop(columns=[df.columns[label_column]]),\n        label=df[df.columns[label_column]],\n\
+          \    )\n\n    booster_params = booster_params or {}\n    booster_params.setdefault('objective',\
+          \ objective)\n    booster_params.setdefault('booster', booster)\n    booster_params.setdefault('learning_rate',\
+          \ learning_rate)\n    booster_params.setdefault('min_split_loss', min_split_loss)\n\
+          \    booster_params.setdefault('max_depth', max_depth)\n\n    starting_model\
+          \ = None\n    if starting_model_path:\n        starting_model = xgboost.Booster(model_file=starting_model_path)\n\
+          \n    model = xgboost.train(\n        params=booster_params,\n        dtrain=training_data,\n\
+          \        num_boost_round=num_iterations,\n        xgb_model=starting_model\n\
+          \    )\n\n    # Saving the model in binary format\n    model.save_model(model_path)\n\
+          \n    model_config_str = model.save_config()\n    with open(model_config_path,\
+          \ 'w') as model_config_file:\n        model_config_file.write(model_config_str)\n\
+          \nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost\
+          \ train', description='Train an XGBoost model.\\n\\n    Args:\\n       \
+          \ training_data_path: Path for the training data in CSV format.\\n     \
+          \   model_path: Output path for the trained model in binary XGBoost format.\\\
+          n        model_config_path: Output path for the internal parameter configuration\
+          \ of Booster as a JSON string.\\n        starting_model_path: Path for the\
+          \ existing trained model to start from.\\n        label_column: Column containing\
+          \ the label data.\\n        num_boost_rounds: Number of boosting iterations.\\\
+          n        booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\\
+          n        objective: The learning task and the corresponding learning objective.\\\
+          n            See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\\
+          n            The most common values are:\\n            \"reg:squarederror\"\
+          \ - Regression with squared loss (default).\\n            \"reg:logistic\"\
+          \ - Logistic regression.\\n            \"binary:logistic\" - Logistic regression\
+          \ for binary classification, output probability.\\n            \"binary:logitraw\"\
+          \ - Logistic regression for binary classification, output score before logistic\
+          \ transformation\\n            \"rank:pairwise\" - Use LambdaMART to perform\
+          \ pairwise ranking where the pairwise loss is minimized\\n            \"\
+          rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
+          \ Discounted Cumulative Gain (NDCG) is maximized\\n\\n    Annotations:\\\
+          n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n_parser.add_argument(\"\
+          --training-data\", dest=\"training_data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--starting-model\"\
+          , dest=\"starting_model_path\", type=str, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--label-column\", dest=\"label_column\", type=int,\
+          \ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--num-iterations\"\
+          , dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads,\
+          \ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\"\
+          , dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\",\
+          \ dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float,\
+          \ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\"\
+          , dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path,\
+          \ required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\"\
+          , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_train(**_parsed_args)\n"
+        image: python:3.7
+    exec-xgboost-train-2:
+      container:
+        args:
+        - --training-data
+        - '{{$.inputs.artifacts[''training_data''].path}}'
+        - --label-column-name
+        - '{{$.inputs.parameters[''label_column_name'']}}'
+        - '{"IfPresent": {"InputName": "starting_model", "Then": ["--starting-model",
+          "{{$.inputs.artifacts[''starting_model''].path}}"]}}'
+        - '{"IfPresent": {"InputName": "num_iterations", "Then": ["--num-iterations",
+          "{{$.inputs.parameters[''num_iterations'']}}"]}}'
+        - '{"IfPresent": {"InputName": "booster_params", "Then": ["--booster-params",
+          "{{$.inputs.parameters[''booster_params'']}}"]}}'
+        - '{"IfPresent": {"InputName": "objective", "Then": ["--objective", "{{$.inputs.parameters[''objective'']}}"]}}'
+        - '{"IfPresent": {"InputName": "booster", "Then": ["--booster", "{{$.inputs.parameters[''booster'']}}"]}}'
+        - '{"IfPresent": {"InputName": "learning_rate", "Then": ["--learning-rate",
+          "{{$.inputs.parameters[''learning_rate'']}}"]}}'
+        - '{"IfPresent": {"InputName": "min_split_loss", "Then": ["--min-split-loss",
+          "{{$.inputs.parameters[''min_split_loss'']}}"]}}'
+        - '{"IfPresent": {"InputName": "max_depth", "Then": ["--max-depth", "{{$.inputs.parameters[''max_depth'']}}"]}}'
+        - --model
+        - '{{$.outputs.artifacts[''model''].path}}'
+        - --model-config
+        - '{{$.outputs.artifacts[''model_config''].path}}'
+        command:
+        - sh
+        - -c
+        - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+          'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
+          python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
+          'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
+        - python3
+        - -u
+        - -c
+        - "def _make_parent_dirs_and_return_path(file_path: str):\n    import os\n\
+          \    os.makedirs(os.path.dirname(file_path), exist_ok=True)\n    return\
+          \ file_path\n\ndef xgboost_train(\n    training_data_path,\n    model_path,\n\
+          \    model_config_path,\n    label_column_name,\n\n    starting_model_path\
+          \ = None,\n\n    num_iterations = 10,\n    booster_params = None,\n\n  \
+          \  # Booster parameters\n    objective = 'reg:squarederror',\n    booster\
+          \ = 'gbtree',\n    learning_rate = 0.3,\n    min_split_loss = 0,\n    max_depth\
+          \ = 6,\n):\n    '''Train an XGBoost model.\n\n    Args:\n        training_data_path:\
+          \ Path for the training data in Apache Parquet format.\n        model_path:\
+          \ Output path for the trained model in binary XGBoost format.\n        model_config_path:\
+          \ Output path for the internal parameter configuration of Booster as a JSON\
+          \ string.\n        starting_model_path: Path for the existing trained model\
+          \ to start from.\n        label_column_name: Name of the column containing\
+          \ the label data.\n        num_boost_rounds: Number of boosting iterations.\n\
+          \        booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n\
+          \        objective: The learning task and the corresponding learning objective.\n\
+          \            See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n\
+          \            The most common values are:\n            \"reg:squarederror\"\
+          \ - Regression with squared loss (default).\n            \"reg:logistic\"\
+          \ - Logistic regression.\n            \"binary:logistic\" - Logistic regression\
+          \ for binary classification, output probability.\n            \"binary:logitraw\"\
+          \ - Logistic regression for binary classification, output score before logistic\
+          \ transformation\n            \"rank:pairwise\" - Use LambdaMART to perform\
+          \ pairwise ranking where the pairwise loss is minimized\n            \"\
+          rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
+          \ Discounted Cumulative Gain (NDCG) is maximized\n\n    Annotations:\n \
+          \       author: Alexey Volkov <alexey.volkov@ark-kun.com>\n    '''\n   \
+          \ import pandas\n    import xgboost\n\n    # Loading data\n    df = pandas.read_parquet(training_data_path)\n\
+          \    training_data = xgboost.DMatrix(\n        data=df.drop(columns=[label_column_name]),\n\
+          \        label=df[[label_column_name]],\n    )\n    # Training\n    booster_params\
+          \ = booster_params or {}\n    booster_params.setdefault('objective', objective)\n\
+          \    booster_params.setdefault('booster', booster)\n    booster_params.setdefault('learning_rate',\
+          \ learning_rate)\n    booster_params.setdefault('min_split_loss', min_split_loss)\n\
+          \    booster_params.setdefault('max_depth', max_depth)\n\n    starting_model\
+          \ = None\n    if starting_model_path:\n        starting_model = xgboost.Booster(model_file=starting_model_path)\n\
+          \n    model = xgboost.train(\n        params=booster_params,\n        dtrain=training_data,\n\
+          \        num_boost_round=num_iterations,\n        xgb_model=starting_model\n\
+          \    )\n\n    # Saving the model in binary format\n    model.save_model(model_path)\n\
+          \n    model_config_str = model.save_config()\n    with open(model_config_path,\
+          \ 'w') as model_config_file:\n        model_config_file.write(model_config_str)\n\
+          \nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost\
+          \ train', description='Train an XGBoost model.\\n\\n    Args:\\n       \
+          \ training_data_path: Path for the training data in Apache Parquet format.\\\
+          n        model_path: Output path for the trained model in binary XGBoost\
+          \ format.\\n        model_config_path: Output path for the internal parameter\
+          \ configuration of Booster as a JSON string.\\n        starting_model_path:\
+          \ Path for the existing trained model to start from.\\n        label_column_name:\
+          \ Name of the column containing the label data.\\n        num_boost_rounds:\
+          \ Number of boosting iterations.\\n        booster_params: Parameters for\
+          \ the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\\
+          n        objective: The learning task and the corresponding learning objective.\\\
+          n            See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\\
+          n            The most common values are:\\n            \"reg:squarederror\"\
+          \ - Regression with squared loss (default).\\n            \"reg:logistic\"\
+          \ - Logistic regression.\\n            \"binary:logistic\" - Logistic regression\
+          \ for binary classification, output probability.\\n            \"binary:logitraw\"\
+          \ - Logistic regression for binary classification, output score before logistic\
+          \ transformation\\n            \"rank:pairwise\" - Use LambdaMART to perform\
+          \ pairwise ranking where the pairwise loss is minimized\\n            \"\
+          rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
+          \ Discounted Cumulative Gain (NDCG) is maximized\\n\\n    Annotations:\\\
+          n        author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n_parser.add_argument(\"\
+          --training-data\", dest=\"training_data_path\", type=str, required=True,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column-name\"\
+          , dest=\"label_column_name\", type=str, required=True, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--starting-model\", dest=\"starting_model_path\"\
+          , type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
+          --num-iterations\", dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads,\
+          \ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\"\
+          , dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False,\
+          \ default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\",\
+          \ dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float,\
+          \ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\"\
+          , dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n\
+          _parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path,\
+          \ required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\"\
+          , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
+          \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
+          \n_outputs = xgboost_train(**_parsed_args)\n"
+        image: python:3.7
+pipelineInfo:
+  name: xgboost-sample-pipeline
+root:
+  dag:
+    tasks:
+      chicago-taxi-trips-dataset:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-chicago-taxi-trips-dataset
+        inputs:
+          parameters:
+            limit:
+              runtimeValue:
+                constant: 10000.0
+            select:
+              runtimeValue:
+                constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
+            where:
+              runtimeValue:
+                constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
+                  < "2019-02-01"
+        taskInfo:
+          name: chicago-taxi-trips-dataset
+      convert-csv-to-apache-parquet:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-convert-csv-to-apache-parquet
+        dependentTasks:
+        - chicago-taxi-trips-dataset
+        inputs:
+          artifacts:
+            data:
+              taskOutputArtifact:
+                outputArtifactKey: table
+                producerTask: chicago-taxi-trips-dataset
+        taskInfo:
+          name: convert-csv-to-apache-parquet
+      xgboost-predict:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict
+        dependentTasks:
+        - chicago-taxi-trips-dataset
+        - xgboost-train
+        inputs:
+          artifacts:
+            data:
+              taskOutputArtifact:
+                outputArtifactKey: table
+                producerTask: chicago-taxi-trips-dataset
+            model:
+              taskOutputArtifact:
+                outputArtifactKey: model
+                producerTask: xgboost-train
+          parameters:
+            label_column:
+              runtimeValue:
+                constant: 0.0
+        taskInfo:
+          name: xgboost-predict
+      xgboost-predict-2:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict-2
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        - xgboost-train-2
+        inputs:
+          artifacts:
+            data:
+              taskOutputArtifact:
+                outputArtifactKey: output_data
+                producerTask: convert-csv-to-apache-parquet
+            model:
+              taskOutputArtifact:
+                outputArtifactKey: model
+                producerTask: xgboost-train-2
+          parameters:
+            label_column_name:
+              runtimeValue:
+                constant: tips
+        taskInfo:
+          name: xgboost-predict-2
+      xgboost-predict-3:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict-3
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        - xgboost-train
+        inputs:
+          artifacts:
+            data:
+              taskOutputArtifact:
+                outputArtifactKey: output_data
+                producerTask: convert-csv-to-apache-parquet
+            model:
+              taskOutputArtifact:
+                outputArtifactKey: model
+                producerTask: xgboost-train
+          parameters:
+            label_column_name:
+              runtimeValue:
+                constant: tips
+        taskInfo:
+          name: xgboost-predict-3
+      xgboost-predict-4:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-predict-4
+        dependentTasks:
+        - chicago-taxi-trips-dataset
+        - xgboost-train-2
+        inputs:
+          artifacts:
+            data:
+              taskOutputArtifact:
+                outputArtifactKey: table
+                producerTask: chicago-taxi-trips-dataset
+            model:
+              taskOutputArtifact:
+                outputArtifactKey: model
+                producerTask: xgboost-train-2
+          parameters:
+            label_column:
+              runtimeValue:
+                constant: 0.0
+        taskInfo:
+          name: xgboost-predict-4
+      xgboost-train:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-train
+        dependentTasks:
+        - chicago-taxi-trips-dataset
+        inputs:
+          artifacts:
+            training_data:
+              taskOutputArtifact:
+                outputArtifactKey: table
+                producerTask: chicago-taxi-trips-dataset
+          parameters:
+            label_column:
+              runtimeValue:
+                constant: 0.0
+            num_iterations:
+              runtimeValue:
+                constant: 200.0
+            objective:
+              runtimeValue:
+                constant: reg:squarederror
+        taskInfo:
+          name: xgboost-train
+      xgboost-train-2:
+        cachingOptions:
+          enableCache: true
+        componentRef:
+          name: comp-xgboost-train-2
+        dependentTasks:
+        - convert-csv-to-apache-parquet
+        inputs:
+          artifacts:
+            training_data:
+              taskOutputArtifact:
+                outputArtifactKey: output_data
+                producerTask: convert-csv-to-apache-parquet
+          parameters:
+            label_column_name:
+              runtimeValue:
+                constant: tips
+            num_iterations:
+              runtimeValue:
+                constant: 200.0
+            objective:
+              runtimeValue:
+                constant: reg:squarederror
+        taskInfo:
+          name: xgboost-train-2
+schemaVersion: 2.1.0
+sdkVersion: kfp-2.7.0