870 lines
45 KiB
YAML
870 lines
45 KiB
YAML
components:
|
|
comp-chicago-taxi-trips-dataset:
|
|
executorLabel: exec-chicago-taxi-trips-dataset
|
|
inputDefinitions:
|
|
parameters:
|
|
format:
|
|
defaultValue: csv
|
|
parameterType: STRING
|
|
limit:
|
|
defaultValue: '1000'
|
|
parameterType: NUMBER_INTEGER
|
|
select:
|
|
defaultValue: trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location
|
|
parameterType: STRING
|
|
where:
|
|
defaultValue: trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"
|
|
parameterType: STRING
|
|
outputDefinitions:
|
|
artifacts:
|
|
table:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-convert-csv-to-apache-parquet:
|
|
executorLabel: exec-convert-csv-to-apache-parquet
|
|
inputDefinitions:
|
|
artifacts:
|
|
data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
outputDefinitions:
|
|
artifacts:
|
|
output_data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-predict:
|
|
executorLabel: exec-xgboost-predict
|
|
inputDefinitions:
|
|
artifacts:
|
|
data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
label_column:
|
|
parameterType: NUMBER_INTEGER
|
|
outputDefinitions:
|
|
artifacts:
|
|
predictions:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-predict-2:
|
|
executorLabel: exec-xgboost-predict-2
|
|
inputDefinitions:
|
|
artifacts:
|
|
data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
label_column_name:
|
|
parameterType: STRING
|
|
outputDefinitions:
|
|
artifacts:
|
|
predictions:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-predict-3:
|
|
executorLabel: exec-xgboost-predict-3
|
|
inputDefinitions:
|
|
artifacts:
|
|
data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
label_column_name:
|
|
parameterType: STRING
|
|
outputDefinitions:
|
|
artifacts:
|
|
predictions:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-predict-4:
|
|
executorLabel: exec-xgboost-predict-4
|
|
inputDefinitions:
|
|
artifacts:
|
|
data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
label_column:
|
|
parameterType: NUMBER_INTEGER
|
|
outputDefinitions:
|
|
artifacts:
|
|
predictions:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-train:
|
|
executorLabel: exec-xgboost-train
|
|
inputDefinitions:
|
|
artifacts:
|
|
training_data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
booster:
|
|
defaultValue: gbtree
|
|
parameterType: STRING
|
|
label_column:
|
|
defaultValue: '0'
|
|
parameterType: NUMBER_INTEGER
|
|
learning_rate:
|
|
defaultValue: '0.3'
|
|
parameterType: NUMBER_DOUBLE
|
|
max_depth:
|
|
defaultValue: '6'
|
|
parameterType: NUMBER_INTEGER
|
|
min_split_loss:
|
|
defaultValue: '0'
|
|
parameterType: NUMBER_DOUBLE
|
|
num_iterations:
|
|
defaultValue: '10'
|
|
parameterType: NUMBER_INTEGER
|
|
objective:
|
|
defaultValue: reg:squarederror
|
|
parameterType: STRING
|
|
outputDefinitions:
|
|
artifacts:
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model_config:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
comp-xgboost-train-2:
|
|
executorLabel: exec-xgboost-train-2
|
|
inputDefinitions:
|
|
artifacts:
|
|
training_data:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
parameters:
|
|
booster:
|
|
defaultValue: gbtree
|
|
parameterType: STRING
|
|
label_column_name:
|
|
parameterType: STRING
|
|
learning_rate:
|
|
defaultValue: '0.3'
|
|
parameterType: NUMBER_DOUBLE
|
|
max_depth:
|
|
defaultValue: '6'
|
|
parameterType: NUMBER_INTEGER
|
|
min_split_loss:
|
|
defaultValue: '0'
|
|
parameterType: NUMBER_DOUBLE
|
|
num_iterations:
|
|
defaultValue: '10'
|
|
parameterType: NUMBER_INTEGER
|
|
objective:
|
|
defaultValue: reg:squarederror
|
|
parameterType: STRING
|
|
outputDefinitions:
|
|
artifacts:
|
|
model:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
model_config:
|
|
artifactType:
|
|
schemaTitle: system.Artifact
|
|
schemaVersion: 0.0.1
|
|
defaultPipelineRoot: dummy_root
|
|
deploymentSpec:
|
|
executors:
|
|
exec-chicago-taxi-trips-dataset:
|
|
container:
|
|
command:
|
|
- sh
|
|
- -c
|
|
- "set -e -x -o pipefail\noutput_path=\"$0\"\nselect=\"$1\"\nwhere=\"$2\"\n\
|
|
limit=\"$3\"\nformat=\"$4\"\nmkdir -p \"$(dirname \"$output_path\")\"\n\
|
|
curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'\"${format}\"\
|
|
\ \\\n --data-urlencode '$limit='\"${limit}\" \\\n --data-urlencode\
|
|
\ '$where='\"${where}\" \\\n --data-urlencode '$select='\"${select}\"\
|
|
\ \\\n | tr -d '\"' > \"$output_path\" # Removing unneeded quotes around\
|
|
\ all numbers\n"
|
|
- '{{$.outputs.artifacts[''table''].path}}'
|
|
- '{{$.inputs.parameters[''select'']}}'
|
|
- '{{$.inputs.parameters[''where'']}}'
|
|
- '{{$.inputs.parameters[''limit'']}}'
|
|
image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
|
|
exec-convert-csv-to-apache-parquet:
|
|
container:
|
|
args:
|
|
- --data
|
|
- '{{$.inputs.artifacts[''data''].path}}'
|
|
- --output-data
|
|
- '{{$.outputs.artifacts[''output_data''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
|
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef convert_csv_to_apache_parquet(\n data_path,\n output_data_path,\n\
|
|
):\n '''Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\
|
|
\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
|
\ '''\n from pyarrow import csv, parquet\n\n table = csv.read_csv(data_path)\n\
|
|
\ parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser\
|
|
\ = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts\
|
|
\ CSV table to Apache Parquet.\\n\\n [Apache Parquet](https://parquet.apache.org/)\\\
|
|
n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
|
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"\
|
|
output_data_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\
|
|
\n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n\
|
|
\ try:\n os.makedirs(os.path.dirname(output_file))\n except\
|
|
\ OSError:\n pass\n with open(output_file, 'w') as f:\n \
|
|
\ f.write(_output_serializers[idx](_outputs[idx]))\n"
|
|
image: python:3.7
|
|
exec-xgboost-predict:
|
|
container:
|
|
args:
|
|
- --data
|
|
- '{{$.inputs.artifacts[''data''].path}}'
|
|
- --model
|
|
- '{{$.inputs.artifacts[''model''].path}}'
|
|
- --label-column
|
|
- '{{$.inputs.parameters[''label_column'']}}'
|
|
- --predictions
|
|
- '{{$.outputs.artifacts[''predictions''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
|
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
|
--user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_predict(\n data_path, # Also supports LibSVM\n\
|
|
\ model_path,\n predictions_path,\n label_column = None,\n):\n\
|
|
\ '''Make predictions using a trained XGBoost model.\n\n Args:\n \
|
|
\ data_path: Path for the feature data in CSV format.\n model_path:\
|
|
\ Path for the trained model in binary XGBoost format.\n predictions_path:\
|
|
\ Output path for the predictions.\n label_column: Column containing\
|
|
\ the label data.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
|
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
|
\ import xgboost\n\n df = pandas.read_csv(\n data_path,\n \
|
|
\ )\n\n if label_column is not None:\n df = df.drop(columns=[df.columns[label_column]])\n\
|
|
\n testing_data = xgboost.DMatrix(\n data=df,\n )\n\n model\
|
|
\ = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(testing_data)\n\
|
|
\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n\
|
|
\ numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser\
|
|
\ = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions\
|
|
\ using a trained XGBoost model.\\n\\n Args:\\n data_path: Path\
|
|
\ for the feature data in CSV format.\\n model_path: Path for the\
|
|
\ trained model in binary XGBoost format.\\n predictions_path: Output\
|
|
\ path for the predictions.\\n label_column: Column containing the\
|
|
\ label data.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
|
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
|
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
|
--label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
|
|
\ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
|
image: python:3.7
|
|
exec-xgboost-predict-2:
|
|
container:
|
|
args:
|
|
- --data
|
|
- '{{$.inputs.artifacts[''data''].path}}'
|
|
- --model
|
|
- '{{$.inputs.artifacts[''model''].path}}'
|
|
- --label-column-name
|
|
- '{{$.inputs.parameters[''label_column_name'']}}'
|
|
- --predictions
|
|
- '{{$.outputs.artifacts[''predictions''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
|
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
|
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
|
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
|
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
|
\ the trained model in binary XGBoost format.\n predictions_path:\
|
|
\ Output path for the predictions.\n label_column_name: Optional.\
|
|
\ Name of the column containing the label data that is excluded during the\
|
|
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
|
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
|
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
|
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
|
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
|
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
|
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
|
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
|
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
|
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
|
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
|
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
|
\ Output path for the predictions.\\n label_column_name: Optional.\
|
|
\ Name of the column containing the label data that is excluded during the\
|
|
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
|
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
|
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
|
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
|
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
|
image: python:3.7
|
|
exec-xgboost-predict-3:
|
|
container:
|
|
args:
|
|
- --data
|
|
- '{{$.inputs.artifacts[''data''].path}}'
|
|
- --model
|
|
- '{{$.inputs.artifacts[''model''].path}}'
|
|
- --label-column-name
|
|
- '{{$.inputs.parameters[''label_column_name'']}}'
|
|
- --predictions
|
|
- '{{$.outputs.artifacts[''predictions''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
|
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n \
|
|
\ predictions_path,\n label_column_name = None,\n):\n '''Make predictions\
|
|
\ using a trained XGBoost model.\n\n Args:\n data_path: Path for\
|
|
\ the feature data in Apache Parquet format.\n model_path: Path for\
|
|
\ the trained model in binary XGBoost format.\n predictions_path:\
|
|
\ Output path for the predictions.\n label_column_name: Optional.\
|
|
\ Name of the column containing the label data that is excluded during the\
|
|
\ prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
|
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
|
\ import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n\
|
|
\ if label_column_name:\n df = df.drop(columns=[label_column_name])\n\
|
|
\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n \
|
|
\ # Training\n model = xgboost.Booster(model_file=model_path)\n\n \
|
|
\ predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True,\
|
|
\ exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport\
|
|
\ argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make\
|
|
\ predictions using a trained XGBoost model.\\n\\n Args:\\n data_path:\
|
|
\ Path for the feature data in Apache Parquet format.\\n model_path:\
|
|
\ Path for the trained model in binary XGBoost format.\\n predictions_path:\
|
|
\ Output path for the predictions.\\n label_column_name: Optional.\
|
|
\ Name of the column containing the label data that is excluded during the\
|
|
\ prediction.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
|
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
|
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
|
--label-column-name\", dest=\"label_column_name\", type=str, required=False,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"\
|
|
predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
|
image: python:3.7
|
|
exec-xgboost-predict-4:
|
|
container:
|
|
args:
|
|
- --data
|
|
- '{{$.inputs.artifacts[''data''].path}}'
|
|
- --model
|
|
- '{{$.inputs.artifacts[''model''].path}}'
|
|
- --label-column
|
|
- '{{$.inputs.parameters[''label_column'']}}'
|
|
- --predictions
|
|
- '{{$.outputs.artifacts[''predictions''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
|
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
|
--user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_predict(\n data_path, # Also supports LibSVM\n\
|
|
\ model_path,\n predictions_path,\n label_column = None,\n):\n\
|
|
\ '''Make predictions using a trained XGBoost model.\n\n Args:\n \
|
|
\ data_path: Path for the feature data in CSV format.\n model_path:\
|
|
\ Path for the trained model in binary XGBoost format.\n predictions_path:\
|
|
\ Output path for the predictions.\n label_column: Column containing\
|
|
\ the label data.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>\n\
|
|
\ '''\n from pathlib import Path\n\n import numpy\n import pandas\n\
|
|
\ import xgboost\n\n df = pandas.read_csv(\n data_path,\n \
|
|
\ )\n\n if label_column is not None:\n df = df.drop(columns=[df.columns[label_column]])\n\
|
|
\n testing_data = xgboost.DMatrix(\n data=df,\n )\n\n model\
|
|
\ = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(testing_data)\n\
|
|
\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n\
|
|
\ numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser\
|
|
\ = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions\
|
|
\ using a trained XGBoost model.\\n\\n Args:\\n data_path: Path\
|
|
\ for the feature data in CSV format.\\n model_path: Path for the\
|
|
\ trained model in binary XGBoost format.\\n predictions_path: Output\
|
|
\ path for the predictions.\\n label_column: Column containing the\
|
|
\ label data.\\n\\n Annotations:\\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n\
|
|
_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"\
|
|
model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
|
--label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\
|
|
\ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_predict(**_parsed_args)\n"
|
|
image: python:3.7
|
|
exec-xgboost-train:
|
|
container:
|
|
args:
|
|
- --training-data
|
|
- '{{$.inputs.artifacts[''training_data''].path}}'
|
|
- --label-column
|
|
- '{{$.inputs.parameters[''label_column'']}}'
|
|
- --num-iterations
|
|
- '{{$.inputs.parameters[''num_iterations'']}}'
|
|
- --objective
|
|
- '{{$.inputs.parameters[''objective'']}}'
|
|
- --model
|
|
- '{{$.outputs.artifacts[''model''].path}}'
|
|
- --model-config
|
|
- '{{$.outputs.artifacts[''model_config''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
|
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
|
--user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_train(\n training_data_path, # Also supports\
|
|
\ LibSVM\n model_path,\n model_config_path,\n starting_model_path\
|
|
\ = None,\n\n label_column = 0,\n num_iterations = 10,\n booster_params\
|
|
\ = None,\n\n # Booster parameters\n objective = 'reg:squarederror',\n\
|
|
\ booster = 'gbtree',\n learning_rate = 0.3,\n min_split_loss =\
|
|
\ 0,\n max_depth = 6,\n):\n '''Train an XGBoost model.\n\n Args:\n\
|
|
\ training_data_path: Path for the training data in CSV format.\n\
|
|
\ model_path: Output path for the trained model in binary XGBoost\
|
|
\ format.\n model_config_path: Output path for the internal parameter\
|
|
\ configuration of Booster as a JSON string.\n starting_model_path:\
|
|
\ Path for the existing trained model to start from.\n label_column:\
|
|
\ Column containing the label data.\n num_boost_rounds: Number of\
|
|
\ boosting iterations.\n booster_params: Parameters for the booster.\
|
|
\ See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective:\
|
|
\ The learning task and the corresponding learning objective.\n \
|
|
\ See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n\
|
|
\ The most common values are:\n \"reg:squarederror\"\
|
|
\ - Regression with squared loss (default).\n \"reg:logistic\"\
|
|
\ - Logistic regression.\n \"binary:logistic\" - Logistic regression\
|
|
\ for binary classification, output probability.\n \"binary:logitraw\"\
|
|
\ - Logistic regression for binary classification, output score before logistic\
|
|
\ transformation\n \"rank:pairwise\" - Use LambdaMART to perform\
|
|
\ pairwise ranking where the pairwise loss is minimized\n \"\
|
|
rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
|
|
\ Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n \
|
|
\ author: Alexey Volkov <alexey.volkov@ark-kun.com>\n '''\n \
|
|
\ import pandas\n import xgboost\n\n df = pandas.read_csv(\n \
|
|
\ training_data_path,\n )\n\n training_data = xgboost.DMatrix(\n\
|
|
\ data=df.drop(columns=[df.columns[label_column]]),\n label=df[df.columns[label_column]],\n\
|
|
\ )\n\n booster_params = booster_params or {}\n booster_params.setdefault('objective',\
|
|
\ objective)\n booster_params.setdefault('booster', booster)\n booster_params.setdefault('learning_rate',\
|
|
\ learning_rate)\n booster_params.setdefault('min_split_loss', min_split_loss)\n\
|
|
\ booster_params.setdefault('max_depth', max_depth)\n\n starting_model\
|
|
\ = None\n if starting_model_path:\n starting_model = xgboost.Booster(model_file=starting_model_path)\n\
|
|
\n model = xgboost.train(\n params=booster_params,\n dtrain=training_data,\n\
|
|
\ num_boost_round=num_iterations,\n xgb_model=starting_model\n\
|
|
\ )\n\n # Saving the model in binary format\n model.save_model(model_path)\n\
|
|
\n model_config_str = model.save_config()\n with open(model_config_path,\
|
|
\ 'w') as model_config_file:\n model_config_file.write(model_config_str)\n\
|
|
\nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost\
|
|
\ train', description='Train an XGBoost model.\\n\\n Args:\\n \
|
|
\ training_data_path: Path for the training data in CSV format.\\n \
|
|
\ model_path: Output path for the trained model in binary XGBoost format.\\\
|
|
n model_config_path: Output path for the internal parameter configuration\
|
|
\ of Booster as a JSON string.\\n starting_model_path: Path for the\
|
|
\ existing trained model to start from.\\n label_column: Column containing\
|
|
\ the label data.\\n num_boost_rounds: Number of boosting iterations.\\\
|
|
n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\\
|
|
n objective: The learning task and the corresponding learning objective.\\\
|
|
n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\\
|
|
n The most common values are:\\n \"reg:squarederror\"\
|
|
\ - Regression with squared loss (default).\\n \"reg:logistic\"\
|
|
\ - Logistic regression.\\n \"binary:logistic\" - Logistic regression\
|
|
\ for binary classification, output probability.\\n \"binary:logitraw\"\
|
|
\ - Logistic regression for binary classification, output score before logistic\
|
|
\ transformation\\n \"rank:pairwise\" - Use LambdaMART to perform\
|
|
\ pairwise ranking where the pairwise loss is minimized\\n \"\
|
|
rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
|
|
\ Discounted Cumulative Gain (NDCG) is maximized\\n\\n Annotations:\\\
|
|
n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n_parser.add_argument(\"\
|
|
--training-data\", dest=\"training_data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--starting-model\"\
|
|
, dest=\"starting_model_path\", type=str, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--label-column\", dest=\"label_column\", type=int,\
|
|
\ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--num-iterations\"\
|
|
, dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads,\
|
|
\ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\"\
|
|
, dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\",\
|
|
\ dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float,\
|
|
\ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\"\
|
|
, dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path,\
|
|
\ required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\"\
|
|
, dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_train(**_parsed_args)\n"
|
|
image: python:3.7
|
|
exec-xgboost-train-2:
|
|
container:
|
|
args:
|
|
- --training-data
|
|
- '{{$.inputs.artifacts[''training_data''].path}}'
|
|
- --label-column-name
|
|
- '{{$.inputs.parameters[''label_column_name'']}}'
|
|
- --num-iterations
|
|
- '{{$.inputs.parameters[''num_iterations'']}}'
|
|
- --objective
|
|
- '{{$.inputs.parameters[''objective'']}}'
|
|
- --model
|
|
- '{{$.outputs.artifacts[''model''].path}}'
|
|
- --model-config
|
|
- '{{$.outputs.artifacts[''model_config''].path}}'
|
|
command:
|
|
- sh
|
|
- -c
|
|
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
|
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1'
|
|
'pandas==1.0.5' 'pyarrow==0.17.1' --user) && "$0" "$@"
|
|
- python3
|
|
- -u
|
|
- -c
|
|
- "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n\
|
|
\ os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return\
|
|
\ file_path\n\ndef xgboost_train(\n training_data_path,\n model_path,\n\
|
|
\ model_config_path,\n label_column_name,\n\n starting_model_path\
|
|
\ = None,\n\n num_iterations = 10,\n booster_params = None,\n\n \
|
|
\ # Booster parameters\n objective = 'reg:squarederror',\n booster\
|
|
\ = 'gbtree',\n learning_rate = 0.3,\n min_split_loss = 0,\n max_depth\
|
|
\ = 6,\n):\n '''Train an XGBoost model.\n\n Args:\n training_data_path:\
|
|
\ Path for the training data in Apache Parquet format.\n model_path:\
|
|
\ Output path for the trained model in binary XGBoost format.\n model_config_path:\
|
|
\ Output path for the internal parameter configuration of Booster as a JSON\
|
|
\ string.\n starting_model_path: Path for the existing trained model\
|
|
\ to start from.\n label_column_name: Name of the column containing\
|
|
\ the label data.\n num_boost_rounds: Number of boosting iterations.\n\
|
|
\ booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n\
|
|
\ objective: The learning task and the corresponding learning objective.\n\
|
|
\ See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n\
|
|
\ The most common values are:\n \"reg:squarederror\"\
|
|
\ - Regression with squared loss (default).\n \"reg:logistic\"\
|
|
\ - Logistic regression.\n \"binary:logistic\" - Logistic regression\
|
|
\ for binary classification, output probability.\n \"binary:logitraw\"\
|
|
\ - Logistic regression for binary classification, output score before logistic\
|
|
\ transformation\n \"rank:pairwise\" - Use LambdaMART to perform\
|
|
\ pairwise ranking where the pairwise loss is minimized\n \"\
|
|
rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
|
|
\ Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n \
|
|
\ author: Alexey Volkov <alexey.volkov@ark-kun.com>\n '''\n \
|
|
\ import pandas\n import xgboost\n\n # Loading data\n df = pandas.read_parquet(training_data_path)\n\
|
|
\ training_data = xgboost.DMatrix(\n data=df.drop(columns=[label_column_name]),\n\
|
|
\ label=df[[label_column_name]],\n )\n # Training\n booster_params\
|
|
\ = booster_params or {}\n booster_params.setdefault('objective', objective)\n\
|
|
\ booster_params.setdefault('booster', booster)\n booster_params.setdefault('learning_rate',\
|
|
\ learning_rate)\n booster_params.setdefault('min_split_loss', min_split_loss)\n\
|
|
\ booster_params.setdefault('max_depth', max_depth)\n\n starting_model\
|
|
\ = None\n if starting_model_path:\n starting_model = xgboost.Booster(model_file=starting_model_path)\n\
|
|
\n model = xgboost.train(\n params=booster_params,\n dtrain=training_data,\n\
|
|
\ num_boost_round=num_iterations,\n xgb_model=starting_model\n\
|
|
\ )\n\n # Saving the model in binary format\n model.save_model(model_path)\n\
|
|
\n model_config_str = model.save_config()\n with open(model_config_path,\
|
|
\ 'w') as model_config_file:\n model_config_file.write(model_config_str)\n\
|
|
\nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost\
|
|
\ train', description='Train an XGBoost model.\\n\\n Args:\\n \
|
|
\ training_data_path: Path for the training data in Apache Parquet format.\\\
|
|
n model_path: Output path for the trained model in binary XGBoost\
|
|
\ format.\\n model_config_path: Output path for the internal parameter\
|
|
\ configuration of Booster as a JSON string.\\n starting_model_path:\
|
|
\ Path for the existing trained model to start from.\\n label_column_name:\
|
|
\ Name of the column containing the label data.\\n num_boost_rounds:\
|
|
\ Number of boosting iterations.\\n booster_params: Parameters for\
|
|
\ the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\\
|
|
n objective: The learning task and the corresponding learning objective.\\\
|
|
n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\\
|
|
n The most common values are:\\n \"reg:squarederror\"\
|
|
\ - Regression with squared loss (default).\\n \"reg:logistic\"\
|
|
\ - Logistic regression.\\n \"binary:logistic\" - Logistic regression\
|
|
\ for binary classification, output probability.\\n \"binary:logitraw\"\
|
|
\ - Logistic regression for binary classification, output score before logistic\
|
|
\ transformation\\n \"rank:pairwise\" - Use LambdaMART to perform\
|
|
\ pairwise ranking where the pairwise loss is minimized\\n \"\
|
|
rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized\
|
|
\ Discounted Cumulative Gain (NDCG) is maximized\\n\\n Annotations:\\\
|
|
n author: Alexey Volkov <alexey.volkov@ark-kun.com>')\n_parser.add_argument(\"\
|
|
--training-data\", dest=\"training_data_path\", type=str, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column-name\"\
|
|
, dest=\"label_column_name\", type=str, required=True, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--starting-model\", dest=\"starting_model_path\"\
|
|
, type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"\
|
|
--num-iterations\", dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads,\
|
|
\ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\"\
|
|
, dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False,\
|
|
\ default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\",\
|
|
\ dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float,\
|
|
\ required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\"\
|
|
, dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n\
|
|
_parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path,\
|
|
\ required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\"\
|
|
, dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\
|
|
\ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\
|
|
\n_outputs = xgboost_train(**_parsed_args)\n"
|
|
image: python:3.7
|
|
pipelineInfo:
|
|
name: xgboost-sample-pipeline
|
|
root:
|
|
dag:
|
|
tasks:
|
|
chicago-taxi-trips-dataset:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-chicago-taxi-trips-dataset
|
|
inputs:
|
|
parameters:
|
|
limit:
|
|
runtimeValue:
|
|
constant: 10000.0
|
|
select:
|
|
runtimeValue:
|
|
constant: tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total
|
|
where:
|
|
runtimeValue:
|
|
constant: trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp
|
|
< "2019-02-01"
|
|
taskInfo:
|
|
name: chicago-taxi-trips-dataset
|
|
convert-csv-to-apache-parquet:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-convert-csv-to-apache-parquet
|
|
dependentTasks:
|
|
- chicago-taxi-trips-dataset
|
|
inputs:
|
|
artifacts:
|
|
data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: table
|
|
producerTask: chicago-taxi-trips-dataset
|
|
taskInfo:
|
|
name: convert-csv-to-apache-parquet
|
|
xgboost-predict:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-predict
|
|
dependentTasks:
|
|
- chicago-taxi-trips-dataset
|
|
- xgboost-train
|
|
inputs:
|
|
artifacts:
|
|
data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: table
|
|
producerTask: chicago-taxi-trips-dataset
|
|
model:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: model
|
|
producerTask: xgboost-train
|
|
parameters:
|
|
label_column:
|
|
runtimeValue:
|
|
constant: 0.0
|
|
taskInfo:
|
|
name: xgboost-predict
|
|
xgboost-predict-2:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-predict-2
|
|
dependentTasks:
|
|
- convert-csv-to-apache-parquet
|
|
- xgboost-train-2
|
|
inputs:
|
|
artifacts:
|
|
data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: output_data
|
|
producerTask: convert-csv-to-apache-parquet
|
|
model:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: model
|
|
producerTask: xgboost-train-2
|
|
parameters:
|
|
label_column_name:
|
|
runtimeValue:
|
|
constant: tips
|
|
taskInfo:
|
|
name: xgboost-predict-2
|
|
xgboost-predict-3:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-predict-3
|
|
dependentTasks:
|
|
- convert-csv-to-apache-parquet
|
|
- xgboost-train
|
|
inputs:
|
|
artifacts:
|
|
data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: output_data
|
|
producerTask: convert-csv-to-apache-parquet
|
|
model:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: model
|
|
producerTask: xgboost-train
|
|
parameters:
|
|
label_column_name:
|
|
runtimeValue:
|
|
constant: tips
|
|
taskInfo:
|
|
name: xgboost-predict-3
|
|
xgboost-predict-4:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-predict-4
|
|
dependentTasks:
|
|
- chicago-taxi-trips-dataset
|
|
- xgboost-train-2
|
|
inputs:
|
|
artifacts:
|
|
data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: table
|
|
producerTask: chicago-taxi-trips-dataset
|
|
model:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: model
|
|
producerTask: xgboost-train-2
|
|
parameters:
|
|
label_column:
|
|
runtimeValue:
|
|
constant: 0.0
|
|
taskInfo:
|
|
name: xgboost-predict-4
|
|
xgboost-train:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-train
|
|
dependentTasks:
|
|
- chicago-taxi-trips-dataset
|
|
inputs:
|
|
artifacts:
|
|
training_data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: table
|
|
producerTask: chicago-taxi-trips-dataset
|
|
parameters:
|
|
label_column:
|
|
runtimeValue:
|
|
constant: 0.0
|
|
num_iterations:
|
|
runtimeValue:
|
|
constant: 200.0
|
|
objective:
|
|
runtimeValue:
|
|
constant: reg:squarederror
|
|
taskInfo:
|
|
name: xgboost-train
|
|
xgboost-train-2:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-xgboost-train-2
|
|
dependentTasks:
|
|
- convert-csv-to-apache-parquet
|
|
inputs:
|
|
artifacts:
|
|
training_data:
|
|
taskOutputArtifact:
|
|
outputArtifactKey: output_data
|
|
producerTask: convert-csv-to-apache-parquet
|
|
parameters:
|
|
label_column_name:
|
|
runtimeValue:
|
|
constant: tips
|
|
num_iterations:
|
|
runtimeValue:
|
|
constant: 200.0
|
|
objective:
|
|
runtimeValue:
|
|
constant: reg:squarederror
|
|
taskInfo:
|
|
name: xgboost-train-2
|
|
schemaVersion: 2.1.0
|
|
sdkVersion: kfp-2.0.0-alpha.1
|