chore(components): clean up component directory (#7183)
* move components to contrib * move components to contrib
This commit is contained in:
parent
4d9e9bedd3
commit
694d3a3764
|
|
@ -1,62 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def catboost_predict_class_probabilities(
|
||||
data_path: InputPath('CSV'),
|
||||
model_path: InputPath('CatBoostModel'),
|
||||
predictions_path: OutputPath(),
|
||||
|
||||
label_column: int = None,
|
||||
):
|
||||
'''Predict class probabilities with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoost, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data, prediction_type='Probability')
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
catboost_predict_class_probabilities_op = create_component_from_func(
|
||||
catboost_predict_class_probabilities,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.23'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
name: Catboost predict class probabilities
|
||||
description: |-
|
||||
Predict class probabilities with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: model, type: CatBoostModel}
|
||||
- {name: label_column, type: Integer, optional: true}
|
||||
outputs:
|
||||
- {name: predictions}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def catboost_predict_class_probabilities(
|
||||
data_path,
|
||||
model_path,
|
||||
predictions_path,
|
||||
|
||||
label_column = None,
|
||||
):
|
||||
'''Predict class probabilities with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoost, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data, prediction_type='Probability')
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Catboost predict class probabilities', description='Predict class probabilities with a CatBoost model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = catboost_predict_class_probabilities(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- --predictions
|
||||
- {outputPath: predictions}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def catboost_predict_classes(
|
||||
data_path: InputPath('CSV'),
|
||||
model_path: InputPath('CatBoostModel'),
|
||||
predictions_path: OutputPath(),
|
||||
|
||||
label_column: int = None,
|
||||
):
|
||||
'''Predict classes using the CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Class predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostClassifier()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data)
|
||||
numpy.savetxt(predictions_path, predictions, fmt='%s')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
catboost_predict_classes_op = create_component_from_func(
|
||||
catboost_predict_classes,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.22'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_classes/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
name: Catboost predict classes
|
||||
description: |-
|
||||
Predict classes using the CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Class predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: model, type: CatBoostModel}
|
||||
- {name: label_column, type: Integer, optional: true}
|
||||
outputs:
|
||||
- {name: predictions}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_classes/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def catboost_predict_classes(
|
||||
data_path,
|
||||
model_path,
|
||||
predictions_path,
|
||||
|
||||
label_column = None,
|
||||
):
|
||||
'''Predict classes using the CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Class predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostClassifier()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data)
|
||||
numpy.savetxt(predictions_path, predictions, fmt='%s')
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Catboost predict classes', description='Predict classes using the CatBoost classifier model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Class predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = catboost_predict_classes(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- --predictions
|
||||
- {outputPath: predictions}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def catboost_predict_values(
|
||||
data_path: InputPath('CSV'),
|
||||
model_path: InputPath('CatBoostModel'),
|
||||
predictions_path: OutputPath(),
|
||||
|
||||
label_column: int = None,
|
||||
):
|
||||
'''Predict values with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoost, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
catboost_predict_values_op = create_component_from_func(
|
||||
catboost_predict_values,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.23'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_values/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
name: Catboost predict values
|
||||
description: |-
|
||||
Predict values with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: model, type: CatBoostModel}
|
||||
- {name: label_column, type: Integer, optional: true}
|
||||
outputs:
|
||||
- {name: predictions}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_values/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def catboost_predict_values(
|
||||
data_path,
|
||||
model_path,
|
||||
predictions_path,
|
||||
|
||||
label_column = None,
|
||||
):
|
||||
'''Predict values with a CatBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the data in CSV format.
|
||||
model_path: Path for the trained model in binary CatBoostModel format.
|
||||
label_column: Column containing the label data.
|
||||
predictions_path: Output path for the predictions.
|
||||
|
||||
Outputs:
|
||||
predictions: Predictions in text format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
|
||||
from catboost import CatBoost, Pool
|
||||
import numpy
|
||||
|
||||
if label_column:
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
else:
|
||||
column_description_path = None
|
||||
|
||||
eval_data = Pool(
|
||||
data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
|
||||
predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Catboost predict values', description='Predict values with a CatBoost model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = catboost_predict_values(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- --predictions
|
||||
- {outputPath: predictions}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/CatBoost](https://github.com/kubeflow/pipelines/tree/master/components/contrib/CatBoost). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def catboost_train_classifier(
|
||||
training_data_path: InputPath('CSV'),
|
||||
model_path: OutputPath('CatBoostModel'),
|
||||
starting_model_path: InputPath('CatBoostModel') = None,
|
||||
label_column: int = 0,
|
||||
|
||||
loss_function: str = 'Logloss',
|
||||
num_iterations: int = 500,
|
||||
learning_rate: float = None,
|
||||
depth: int = 6,
|
||||
random_seed: int = 0,
|
||||
|
||||
cat_features: list = None,
|
||||
text_features: list = None,
|
||||
|
||||
additional_training_options: dict = {},
|
||||
):
|
||||
'''Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'Logloss'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
text_features: A list of Text features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
|
||||
train_data = Pool(
|
||||
training_data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostClassifier(
|
||||
iterations=num_iterations,
|
||||
depth=depth,
|
||||
learning_rate=learning_rate,
|
||||
loss_function=loss_function,
|
||||
random_seed=random_seed,
|
||||
verbose=True,
|
||||
**additional_training_options,
|
||||
)
|
||||
|
||||
model.fit(
|
||||
train_data,
|
||||
cat_features=cat_features,
|
||||
text_features=text_features,
|
||||
init_model=starting_model_path,
|
||||
#verbose=False,
|
||||
#plot=True,
|
||||
)
|
||||
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
model.save_model(model_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
catboost_train_classifier_op = create_component_from_func(
|
||||
catboost_train_classifier,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.23'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_classifier/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,220 +0,0 @@
|
|||
name: Catboost train classifier
|
||||
description: |-
|
||||
Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'Logloss'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
text_features: A list of Text features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: training_data, type: CSV}
|
||||
- {name: starting_model, type: CatBoostModel, optional: true}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: loss_function, type: String, default: Logloss, optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '500', optional: true}
|
||||
- {name: learning_rate, type: Float, optional: true}
|
||||
- {name: depth, type: Integer, default: '6', optional: true}
|
||||
- {name: random_seed, type: Integer, default: '0', optional: true}
|
||||
- {name: cat_features, type: JsonArray, optional: true}
|
||||
- {name: text_features, type: JsonArray, optional: true}
|
||||
- {name: additional_training_options, type: JsonObject, default: '{}', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: CatBoostModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_classifier/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def catboost_train_classifier(
|
||||
training_data_path,
|
||||
model_path,
|
||||
starting_model_path = None,
|
||||
label_column = 0,
|
||||
|
||||
loss_function = 'Logloss',
|
||||
num_iterations = 500,
|
||||
learning_rate = None,
|
||||
depth = 6,
|
||||
random_seed = 0,
|
||||
|
||||
cat_features = None,
|
||||
text_features = None,
|
||||
|
||||
additional_training_options = {},
|
||||
):
|
||||
'''Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'Logloss'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
text_features: A list of Text features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
|
||||
train_data = Pool(
|
||||
training_data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostClassifier(
|
||||
iterations=num_iterations,
|
||||
depth=depth,
|
||||
learning_rate=learning_rate,
|
||||
loss_function=loss_function,
|
||||
random_seed=random_seed,
|
||||
verbose=True,
|
||||
**additional_training_options,
|
||||
)
|
||||
|
||||
model.fit(
|
||||
train_data,
|
||||
cat_features=cat_features,
|
||||
text_features=text_features,
|
||||
init_model=starting_model_path,
|
||||
#verbose=False,
|
||||
#plot=True,
|
||||
)
|
||||
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
model.save_model(model_path)
|
||||
|
||||
import json
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Catboost train classifier', description="Train a CatBoost classifier model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary CatBoostModel format.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n\n loss_function: The metric to use in training and also selector of the machine learning\n problem to solve. Default = 'Logloss'\n num_iterations: Number of trees to add to the ensemble.\n learning_rate: Step size shrinkage used in update to prevents overfitting.\n Default value is selected automatically for binary classification with other parameters set to default.\n In all other cases default is 0.03.\n depth: Depth of a tree. All trees are the same depth. Default = 6\n random_seed: Random number seed. Default = 0\n\n cat_features: A list of Categorical features (indices or names).\n text_features: A list of Text features (indices or names).\n additional_training_options: A dictionary with additional options to pass to CatBoostClassifier\n\n Outputs:\n model: Trained model in binary CatBoostModel format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>")
|
||||
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--loss-function", dest="loss_function", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--depth", dest="depth", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--cat-features", dest="cat_features", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--text-features", dest="text_features", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--additional-training-options", dest="additional_training_options", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = catboost_train_classifier(**_parsed_args)
|
||||
args:
|
||||
- --training-data
|
||||
- {inputPath: training_data}
|
||||
- if:
|
||||
cond: {isPresent: starting_model}
|
||||
then:
|
||||
- --starting-model
|
||||
- {inputPath: starting_model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- if:
|
||||
cond: {isPresent: loss_function}
|
||||
then:
|
||||
- --loss-function
|
||||
- {inputValue: loss_function}
|
||||
- if:
|
||||
cond: {isPresent: num_iterations}
|
||||
then:
|
||||
- --num-iterations
|
||||
- {inputValue: num_iterations}
|
||||
- if:
|
||||
cond: {isPresent: learning_rate}
|
||||
then:
|
||||
- --learning-rate
|
||||
- {inputValue: learning_rate}
|
||||
- if:
|
||||
cond: {isPresent: depth}
|
||||
then:
|
||||
- --depth
|
||||
- {inputValue: depth}
|
||||
- if:
|
||||
cond: {isPresent: random_seed}
|
||||
then:
|
||||
- --random-seed
|
||||
- {inputValue: random_seed}
|
||||
- if:
|
||||
cond: {isPresent: cat_features}
|
||||
then:
|
||||
- --cat-features
|
||||
- {inputValue: cat_features}
|
||||
- if:
|
||||
cond: {isPresent: text_features}
|
||||
then:
|
||||
- --text-features
|
||||
- {inputValue: text_features}
|
||||
- if:
|
||||
cond: {isPresent: additional_training_options}
|
||||
then:
|
||||
- --additional-training-options
|
||||
- {inputValue: additional_training_options}
|
||||
- --model
|
||||
- {outputPath: model}
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def catboost_train_regression(
|
||||
training_data_path: InputPath('CSV'),
|
||||
model_path: OutputPath('CatBoostModel'),
|
||||
starting_model_path: InputPath('CatBoostModel') = None,
|
||||
label_column: int = 0,
|
||||
|
||||
loss_function: str = 'RMSE',
|
||||
num_iterations: int = 500,
|
||||
learning_rate: float = None,
|
||||
depth: int = 6,
|
||||
random_seed: int = 0,
|
||||
|
||||
cat_features: list = None,
|
||||
|
||||
additional_training_options: dict = {},
|
||||
):
|
||||
'''Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'RMSE'. Possible values:
|
||||
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
|
||||
train_data = Pool(
|
||||
training_data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
iterations=num_iterations,
|
||||
depth=depth,
|
||||
learning_rate=learning_rate,
|
||||
loss_function=loss_function,
|
||||
random_seed=random_seed,
|
||||
verbose=True,
|
||||
**additional_training_options,
|
||||
)
|
||||
|
||||
model.fit(
|
||||
train_data,
|
||||
cat_features=cat_features,
|
||||
init_model=starting_model_path,
|
||||
#verbose=False,
|
||||
#plot=True,
|
||||
)
|
||||
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
model.save_model(model_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
catboost_train_regression_op = create_component_from_func(
|
||||
catboost_train_regression,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.23'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_regression/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
name: Catboost train regression
|
||||
description: |-
|
||||
Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'RMSE'. Possible values:
|
||||
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: training_data, type: CSV}
|
||||
- {name: starting_model, type: CatBoostModel, optional: true}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: loss_function, type: String, default: RMSE, optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '500', optional: true}
|
||||
- {name: learning_rate, type: Float, optional: true}
|
||||
- {name: depth, type: Integer, default: '6', optional: true}
|
||||
- {name: random_seed, type: Integer, default: '0', optional: true}
|
||||
- {name: cat_features, type: JsonArray, optional: true}
|
||||
- {name: additional_training_options, type: JsonObject, default: '{}', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: CatBoostModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_regression/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def catboost_train_regression(
|
||||
training_data_path,
|
||||
model_path,
|
||||
starting_model_path = None,
|
||||
label_column = 0,
|
||||
|
||||
loss_function = 'RMSE',
|
||||
num_iterations = 500,
|
||||
learning_rate = None,
|
||||
depth = 6,
|
||||
random_seed = 0,
|
||||
|
||||
cat_features = None,
|
||||
|
||||
additional_training_options = {},
|
||||
):
|
||||
'''Train a CatBoost classifier model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary CatBoostModel format.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
loss_function: The metric to use in training and also selector of the machine learning
|
||||
problem to solve. Default = 'RMSE'. Possible values:
|
||||
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
|
||||
num_iterations: Number of trees to add to the ensemble.
|
||||
learning_rate: Step size shrinkage used in update to prevents overfitting.
|
||||
Default value is selected automatically for binary classification with other parameters set to default.
|
||||
In all other cases default is 0.03.
|
||||
depth: Depth of a tree. All trees are the same depth. Default = 6
|
||||
random_seed: Random number seed. Default = 0
|
||||
|
||||
cat_features: A list of Categorical features (indices or names).
|
||||
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
|
||||
|
||||
Outputs:
|
||||
model: Trained model in binary CatBoostModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
column_descriptions = {label_column: 'Label'}
|
||||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
|
||||
with open(column_description_path, 'w') as column_description_file:
|
||||
for idx, kind in column_descriptions.items():
|
||||
column_description_file.write('{}\t{}\n'.format(idx, kind))
|
||||
|
||||
train_data = Pool(
|
||||
training_data_path,
|
||||
column_description=column_description_path,
|
||||
has_header=True,
|
||||
delimiter=',',
|
||||
)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
iterations=num_iterations,
|
||||
depth=depth,
|
||||
learning_rate=learning_rate,
|
||||
loss_function=loss_function,
|
||||
random_seed=random_seed,
|
||||
verbose=True,
|
||||
**additional_training_options,
|
||||
)
|
||||
|
||||
model.fit(
|
||||
train_data,
|
||||
cat_features=cat_features,
|
||||
init_model=starting_model_path,
|
||||
#verbose=False,
|
||||
#plot=True,
|
||||
)
|
||||
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
model.save_model(model_path)
|
||||
|
||||
import json
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Catboost train regression', description="Train a CatBoost classifier model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary CatBoostModel format.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n\n loss_function: The metric to use in training and also selector of the machine learning\n problem to solve. Default = 'RMSE'. Possible values:\n 'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'\n num_iterations: Number of trees to add to the ensemble.\n learning_rate: Step size shrinkage used in update to prevents overfitting.\n Default value is selected automatically for binary classification with other parameters set to default.\n In all other cases default is 0.03.\n depth: Depth of a tree. All trees are the same depth. Default = 6\n random_seed: Random number seed. Default = 0\n\n cat_features: A list of Categorical features (indices or names).\n additional_training_options: A dictionary with additional options to pass to CatBoostRegressor\n\n Outputs:\n model: Trained model in binary CatBoostModel format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>")
|
||||
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--loss-function", dest="loss_function", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--depth", dest="depth", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--cat-features", dest="cat_features", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--additional-training-options", dest="additional_training_options", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = catboost_train_regression(**_parsed_args)
|
||||
args:
|
||||
- --training-data
|
||||
- {inputPath: training_data}
|
||||
- if:
|
||||
cond: {isPresent: starting_model}
|
||||
then:
|
||||
- --starting-model
|
||||
- {inputPath: starting_model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- if:
|
||||
cond: {isPresent: loss_function}
|
||||
then:
|
||||
- --loss-function
|
||||
- {inputValue: loss_function}
|
||||
- if:
|
||||
cond: {isPresent: num_iterations}
|
||||
then:
|
||||
- --num-iterations
|
||||
- {inputValue: num_iterations}
|
||||
- if:
|
||||
cond: {isPresent: learning_rate}
|
||||
then:
|
||||
- --learning-rate
|
||||
- {inputValue: learning_rate}
|
||||
- if:
|
||||
cond: {isPresent: depth}
|
||||
then:
|
||||
- --depth
|
||||
- {inputValue: depth}
|
||||
- if:
|
||||
cond: {isPresent: random_seed}
|
||||
then:
|
||||
- --random-seed
|
||||
- {inputValue: random_seed}
|
||||
- if:
|
||||
cond: {isPresent: cat_features}
|
||||
then:
|
||||
- --cat-features
|
||||
- {inputValue: cat_features}
|
||||
- if:
|
||||
cond: {isPresent: additional_training_options}
|
||||
then:
|
||||
- --additional-training-options
|
||||
- {inputValue: additional_training_options}
|
||||
- --model
|
||||
- {outputPath: model}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
import kfp
|
||||
from kfp import components
|
||||
|
||||
|
||||
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
|
||||
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e69a6694/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
|
||||
|
||||
catboost_train_classifier_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Train_classifier/from_CSV/component.yaml')
|
||||
catboost_train_regression_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Train_regression/from_CSV/component.yaml')
|
||||
catboost_predict_classes_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_classes/from_CSV/component.yaml')
|
||||
catboost_predict_values_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_values/from_CSV/component.yaml')
|
||||
catboost_predict_class_probabilities_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml')
|
||||
catboost_to_apple_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml')
|
||||
catboost_to_onnx_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml')
|
||||
|
||||
|
||||
def catboost_pipeline():
|
||||
training_data_in_csv = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
).output
|
||||
|
||||
training_data_for_classification_in_csv = pandas_transform_csv_op(
|
||||
table=training_data_in_csv,
|
||||
transform_code='''df.insert(0, "was_tipped", df["tips"] > 0); del df["tips"]''',
|
||||
).output
|
||||
|
||||
catboost_train_regression_task = catboost_train_regression_op(
|
||||
training_data=training_data_in_csv,
|
||||
loss_function='RMSE',
|
||||
label_column=0,
|
||||
num_iterations=200,
|
||||
)
|
||||
|
||||
regression_model = catboost_train_regression_task.outputs['model']
|
||||
|
||||
catboost_train_classifier_task = catboost_train_classifier_op(
|
||||
training_data=training_data_for_classification_in_csv,
|
||||
label_column=0,
|
||||
num_iterations=200,
|
||||
)
|
||||
|
||||
classification_model = catboost_train_classifier_task.outputs['model']
|
||||
|
||||
evaluation_data_for_regression_in_csv = training_data_in_csv
|
||||
evaluation_data_for_classification_in_csv = training_data_for_classification_in_csv
|
||||
|
||||
catboost_predict_values_op(
|
||||
data=evaluation_data_for_regression_in_csv,
|
||||
model=regression_model,
|
||||
label_column=0,
|
||||
)
|
||||
|
||||
catboost_predict_classes_op(
|
||||
data=evaluation_data_for_classification_in_csv,
|
||||
model=classification_model,
|
||||
label_column=0,
|
||||
)
|
||||
|
||||
catboost_predict_class_probabilities_op(
|
||||
data=evaluation_data_for_classification_in_csv,
|
||||
model=classification_model,
|
||||
label_column=0,
|
||||
)
|
||||
|
||||
catboost_to_apple_op(regression_model)
|
||||
catboost_to_apple_op(classification_model)
|
||||
|
||||
catboost_to_onnx_op(regression_model)
|
||||
catboost_to_onnx_op(classification_model)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp_endpoint=None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(catboost_pipeline, arguments={})
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_CatBoostModel_to_AppleCoreMLModel(
|
||||
model_path: InputPath('CatBoostModel'),
|
||||
converted_model_path: OutputPath('AppleCoreMLModel'),
|
||||
):
|
||||
'''Convert CatBoost model to Apple CoreML format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in Apple CoreML format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from catboost import CatBoost
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
model.save_model(
|
||||
converted_model_path,
|
||||
format="coreml",
|
||||
# export_parameters={'prediction_type': 'probability'},
|
||||
# export_parameters={'prediction_type': 'raw'},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
convert_CatBoostModel_to_AppleCoreMLModel,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.22'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
name: Convert CatBoostModel to AppleCoreMLModel
|
||||
description: |-
|
||||
Convert CatBoost model to Apple CoreML format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in Apple CoreML format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: model, type: CatBoostModel}
|
||||
outputs:
|
||||
- {name: converted_model, type: AppleCoreMLModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_CatBoostModel_to_AppleCoreMLModel(
|
||||
model_path,
|
||||
converted_model_path,
|
||||
):
|
||||
'''Convert CatBoost model to Apple CoreML format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in Apple CoreML format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from catboost import CatBoost
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
model.save_model(
|
||||
converted_model_path,
|
||||
format="coreml",
|
||||
# export_parameters={'prediction_type': 'probability'},
|
||||
# export_parameters={'prediction_type': 'raw'},
|
||||
)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert CatBoostModel to AppleCoreMLModel', description='Convert CatBoost model to Apple CoreML format.\n\n Args:\n model_path: Path of a trained model in binary CatBoost model format.\n converted_model_path: Output path for the converted model.\n\n Outputs:\n converted_model: Model in Apple CoreML format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = convert_CatBoostModel_to_AppleCoreMLModel(**_parsed_args)
|
||||
args:
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- --converted-model
|
||||
- {outputPath: converted_model}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_CatBoostModel_to_ONNX(
|
||||
model_path: InputPath('CatBoostModel'),
|
||||
converted_model_path: OutputPath('ONNX'),
|
||||
):
|
||||
'''Convert CatBoost model to ONNX format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in ONNX format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from catboost import CatBoost
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
model.save_model(converted_model_path, format="onnx")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
convert_CatBoostModel_to_ONNX,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['catboost==0.22'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
name: Convert CatBoostModel to ONNX
|
||||
description: |-
|
||||
Convert CatBoost model to ONNX format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in ONNX format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: model, type: CatBoostModel}
|
||||
outputs:
|
||||
- {name: converted_model, type: ONNX}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_CatBoostModel_to_ONNX(
|
||||
model_path,
|
||||
converted_model_path,
|
||||
):
|
||||
'''Convert CatBoost model to ONNX format.
|
||||
|
||||
Args:
|
||||
model_path: Path of a trained model in binary CatBoost model format.
|
||||
converted_model_path: Output path for the converted model.
|
||||
|
||||
Outputs:
|
||||
converted_model: Model in ONNX format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from catboost import CatBoost
|
||||
|
||||
model = CatBoost()
|
||||
model.load_model(model_path)
|
||||
model.save_model(converted_model_path, format="onnx")
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert CatBoostModel to ONNX', description='Convert CatBoost model to ONNX format.\n\n Args:\n model_path: Path of a trained model in binary CatBoost model format.\n converted_model_path: Output path for the converted model.\n\n Outputs:\n converted_model: Model in ONNX format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = convert_CatBoostModel_to_ONNX(**_parsed_args)
|
||||
args:
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- --converted-model
|
||||
- {outputPath: converted_model}
|
||||
|
|
@ -5,333 +5,3 @@ Components are the building blocks of pipelines.
|
|||
Component definition files describe component interface (inputs and outputs) and implementation (how to call the containerized program).
|
||||
Users can load components with KFP SDK, instantiate the components and compose them in a pipeline graph.
|
||||
|
||||
## Example components
|
||||
|
||||
* Python-based: [Train with XGBoost](https://github.com/kubeflow/pipelines/tree/ea94251143f300fafed8950a1b4ba0a6b6065094/components/XGBoost/Train)
|
||||
* Command-line-based: [Execute notebook](https://github.com/kubeflow/pipelines/blob/329ed48/components/notebooks/Run_notebook_using_papermill/component.yaml)
|
||||
* Graph-based: [Cross-validate](https://github.com/kubeflow/pipelines/tree/34cb59daaea4f800afae5d968b5efd31eb432291/components/XGBoost/Cross_validation_for_regression/from_CSV)
|
||||
|
||||
See how to [build your own components](https://www.kubeflow.org/docs/pipelines/sdk/component-development/).
|
||||
Also see the tutorials for [data passing for components based on python functions](https://github.com/kubeflow/pipelines/blob/fd5778d/samples/tutorials/Data%20passing%20in%20python%20components.ipynb) and [components based on command-line programs](https://github.com/Ark-kun/kfp_samples/blob/ae1a5b6/2019-10%20Kubeflow%20summit/106%20-%20Creating%20components%20from%20command-line%20programs/106%20-%20Creating%20components%20from%20command-line%20programs.ipynb).
|
||||
|
||||
## Index of components
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_ApacheArrowFeather](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_ApacheArrowFeather) / [Convert apache arrow feather to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_CSV) / [Convert csv to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_CSV/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_TSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_TSV) / [Convert tsv to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_TSV/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_ApacheArrowFeather](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_ApacheArrowFeather) / [Convert apache parquet to apache arrow feather](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_CSV) / [Convert apache parquet to csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_CSV/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_TSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_TSV) / [Convert apache parquet to tsv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_TSV/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5) / [to_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5/to_TensorflowSavedModel) / [Keras convert hdf5 model to tf saved model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [OnnxModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_KerasModelHdf5) / [To ONNX from Keras HDF5 model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_KerasModelHdf5/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [OnnxModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_TensorflowSavedModel) / [To ONNX from Tensorflow SavedModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_TensorflowSavedModel/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSGraphModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow JS GraphModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSGraphModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel) / [Convert Tensorflow SavedModel to Tensorflow JS GraphModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSLayersModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow JS LayersModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSLayersModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel) / [Convert Keras SavedModel to Tensorflow JS LayersModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowLiteModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow Lite model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_KerasModelHdf5/component.yaml)
|
||||
|
||||
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowLiteModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel) / [Convert Tensorflow SavedModel to Tensorflow Lite model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [athena](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena) / [query](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena/query) / [Athena Query](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena/query/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [create_cluster](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/create_cluster) / [emr_create_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/create_cluster/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [delete_cluster](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/delete_cluster) / [emr_delete_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/delete_cluster/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [submit_pyspark_job](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_pyspark_job) / [emr_submit_pyspark_job](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_pyspark_job/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [submit_spark_job](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_spark_job) / [emr_submit_spark_job](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_spark_job/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [batch_transform](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/batch_transform) / [SageMaker - Batch Transformation](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/batch_transform/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [create_simulation_app](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/create_simulation_app) / [RoboMaker - Create Simulation Application](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/create_simulation_app/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [delete_simulation_app](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/delete_simulation_app) / [RoboMaker - Delete Simulation Application](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/delete_simulation_app/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [deploy](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/deploy) / [SageMaker - Deploy Model](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/deploy/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [ground_truth](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/ground_truth) / [SageMaker - Ground Truth](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/ground_truth/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [hyperparameter_tuning](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/hyperparameter_tuning) / [SageMaker - Hyperparameter Tuning](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/hyperparameter_tuning/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [model](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/model) / [SageMaker - Create Model](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/model/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [process](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/process) / [SageMaker - Processing Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/process/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [rlestimator](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/rlestimator) / [SageMaker - RLEstimator Training Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/rlestimator/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [simulation_job](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job) / [RoboMaker - Create Simulation Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [simulation_job_batch](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job_batch) / [RoboMaker - Create Simulation Job Batch](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job_batch/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [train](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/train) / [SageMaker - Training Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/train/component.yaml)
|
||||
|
||||
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [workteam](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/workteam) / [SageMaker - Private Workforce](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/workteam/component.yaml)
|
||||
|
||||
/ [azure](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure) / [azuredevops](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops) / [queue-pipeline](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops/queue-pipeline) / [Queue Azure Pipeline](https://raw.githubusercontent.com/kubeflow/pipelines/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops/queue-pipeline/component.yaml)
|
||||
|
||||
/ [azure](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure) / [azureml](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml) / [aml-deploy-model](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml/aml-deploy-model) / [Azure ML Deploy Model](https://raw.githubusercontent.com/kubeflow/pipelines/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml/aml-deploy-model/component.yaml)
|
||||
|
||||
/ [azure](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure) / [azureml](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml) / [aml-register-model](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml/aml-register-model) / [Azure ML Register Model](https://raw.githubusercontent.com/kubeflow/pipelines/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml/aml-register-model/component.yaml)
|
||||
|
||||
/ [azure](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure) / [azuresynapse](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse) / [runsparkjob](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse/runsparkjob) / [Azure Synapse Run Spark Job](https://raw.githubusercontent.com/kubeflow/pipelines/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse/runsparkjob/component.yaml)
|
||||
|
||||
/ [basics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/basics) / [Calculate_hash](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/basics/Calculate_hash) / [Calculate data hash](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/basics/Calculate_hash/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [convert_CatBoostModel_to_AppleCoreMLModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel) / [Convert CatBoostModel to AppleCoreMLModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [convert_CatBoostModel_to_ONNX](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_ONNX) / [Convert CatBoostModel to ONNX](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_class_probabilities](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities/from_CSV) / [Catboost predict class probabilities](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_classes](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes/from_CSV) / [Catboost predict classes](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes/from_CSV/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_values](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values/from_CSV) / [Catboost predict values](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values/from_CSV/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier/from_CSV) / [Catboost train classifier](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier/from_CSV/component.yaml)
|
||||
|
||||
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Train_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression/from_CSV) / [Catboost train regression](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression/from_CSV/component.yaml)
|
||||
|
||||
/ [dataset_manipulation](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation) / [split_data_into_folds](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds) / [in_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds/in_CSV) / [Split table into folds](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml)
|
||||
|
||||
/ [datasets](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/datasets) / [Chicago_Taxi_Trips](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/datasets/Chicago_Taxi_Trips) / [Chicago Taxi Trips dataset](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/datasets/Chicago_Taxi_Trips/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/predict) / [Predict using TF on Dataflow](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/predict/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tfdv](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfdv) / [TFX - Data Validation](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfdv/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tfma](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfma) / [TFX - Analyze model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfma/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tft](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tft) / [Transform using TF on Dataflow](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tft/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Evaluator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator) / [Evaluator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Evaluator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/with_URI_IO) / [Evaluator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [BigQueryExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen) / [BigQueryExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [BigQueryExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/with_URI_IO) / [BigQueryExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [CsvExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen) / [CsvExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [CsvExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/with_URI_IO) / [CsvExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [ImportExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen) / [ImportExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [ImportExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/with_URI_IO) / [ImportExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleValidator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator) / [ExampleValidator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleValidator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/with_URI_IO) / [ExampleValidator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [SchemaGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen) / [SchemaGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [SchemaGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/with_URI_IO) / [SchemaGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [StatisticsGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen) / [StatisticsGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [StatisticsGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/with_URI_IO) / [StatisticsGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Trainer](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer) / [Trainer](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Trainer](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/with_URI_IO) / [Trainer](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/with_URI_IO/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Transform](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform) / [Transform](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/component.yaml)
|
||||
|
||||
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Transform](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/with_URI_IO) / [Transform](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/with_URI_IO/component.yaml)
|
||||
|
||||
/ [diagnostics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics) / [diagnose_me](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics/diagnose_me) / [Run diagnose me](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics/diagnose_me/component.yaml)
|
||||
|
||||
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [get_file](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_file) / [Get file](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_file/component.yaml)
|
||||
|
||||
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [get_subdirectory](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_subdirectory) / [Get subdirectory](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_subdirectory/component.yaml)
|
||||
|
||||
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [list_items](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/list_items) / [List items](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/list_items/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl) / [create_dataset_for_tables](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl/create_dataset_for_tables) / [Automl create dataset for tables](https://raw.githubusercontent.com/kubeflow/pipelines/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl/create_dataset_for_tables/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [create_model_for_tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/create_model_for_tables) / [Automl create model for tables](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/create_model_for_tables/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [deploy_model](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/deploy_model) / [Automl deploy model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/deploy_model/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [export_data_to_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_data_to_gcs) / [Automl export data to gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_data_to_gcs/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [export_model_to_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_model_to_gcs) / [Automl export model to gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_model_to_gcs/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [import_data_from_bigquery](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_bigquery) / [Automl import data from bigquery](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_bigquery/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [import_data_from_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_gcs) / [Automl import data from gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_gcs/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [prediction_service_batch_predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/prediction_service_batch_predict) / [Automl prediction service batch predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/prediction_service_batch_predict/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [split_dataset_table_column_names](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/split_dataset_table_column_names) / [Automl split dataset table column names](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/split_dataset_table_column_names/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_CSV](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_CSV) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_CSV/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_gcs](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_gcs) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_gcs/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_table](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_table) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_table/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow) / [launch_python](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow/launch_python) / [Launch Python](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataflow/launch_python/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow) / [launch_template](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow/launch_template) / [Launch Dataflow Template](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataflow/launch_template/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [create_cluster](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/create_cluster) / [dataproc_create_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/create_cluster/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [delete_cluster](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/delete_cluster) / [dataproc_delete_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/delete_cluster/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_hadoop_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_hadoop_job) / [dataproc_submit_hadoop_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_hadoop_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_hive_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_hive_job) / [dataproc_submit_hive_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_hive_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_pig_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_pig_job) / [dataproc_submit_pig_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_pig_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_pyspark_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_pyspark_job) / [dataproc_submit_pyspark_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_pyspark_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_spark_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_spark_job) / [dataproc_submit_spark_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_spark_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_sparksql_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_sparksql_job) / [dataproc_submit_sparksql_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_sparksql_job/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [batch_predict](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/batch_predict) / [Batch predict against a model with Cloud ML Engine](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/batch_predict/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [deploy](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/deploy) / [Deploying a trained model to Cloud Machine Learning Engine](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/deploy/component.yaml)
|
||||
|
||||
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [train](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/train) / [Submitting a Cloud ML training job as a pipeline step](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/train/component.yaml)
|
||||
|
||||
/ [git](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/git) / [clone](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/git/clone) / [Git clone](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/git/clone/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [AutoML](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML) / [Tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables) / [Create_dataset](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset/from_CSV) / [Automl create tables dataset from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset/from_CSV/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Add_measurement_for_trial](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Add_measurement_for_trial) / [Add measurement for trial in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Add_measurement_for_trial/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Create_study](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Create_study) / [Create study in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Create_study/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer) / [Suggest_parameter_sets_based_on_measurements](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer/Suggest_parameter_sets_based_on_measurements) / [Suggest parameter sets from measurements using gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer/Suggest_parameter_sets_based_on_measurements/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Suggest_trials](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Suggest_trials) / [Suggest trials in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Suggest_trials/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download_blob](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_blob) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_blob/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download_dir](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_dir) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_dir/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [list](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/list) / [List blobs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/list/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [upload_to_explicit_uri](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_explicit_uri) / [Upload to GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_explicit_uri/component.yaml)
|
||||
|
||||
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [upload_to_unique_uri](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_unique_uri) / [Upload to GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_unique_uri/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [commons](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons) / [config](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons/config) / [Create Secret - Kubernetes Cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons/config/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [ffdl](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl) / [serve](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/serve) / [Serve PyTorch Model - Seldon Core](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/serve/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [ffdl](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl) / [train](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/train) / [Train Model - FfDL](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/train/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [data_preprocess_spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/data_preprocess_spark) / [Preprocess Data using Spark - IBM Cloud](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/data_preprocess_spark/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [store_spark_model](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/store_spark_model) / [Store Spark Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/store_spark_model/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [train_spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/train_spark) / [Train Spark Model - IBM Cloud](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/train_spark/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [deploy](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/deploy) / [Deploy Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/deploy/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage) / [monitor_fairness](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_fairness) / [Monitor Fairness - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_fairness/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage) / [monitor_quality](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_quality) / [Monitor quality - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_quality/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage) / [subscribe](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage/subscribe) / [Subscribe - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage/subscribe/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [store](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/store) / [Store model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/store/component.yaml)
|
||||
|
||||
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/train) / [Train Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/train/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Build_dict](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_dict) / [Build dict](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_dict/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Build_list](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_list) / [Build list](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_list/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Combine_lists](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Combine_lists) / [Combine lists](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Combine_lists/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Get_element_by_index](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_index) / [Get element by index from JSON](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_index/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Get_element_by_key](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_key) / [Get element by key from JSON](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_key/component.yaml)
|
||||
|
||||
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Query](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Query) / [Query JSON using JQ](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Query/component.yaml)
|
||||
|
||||
/ [keras](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras) / [Train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier/from_CSV) / [Keras train classifier from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier/from_CSV/component.yaml)
|
||||
|
||||
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow) / [deployer](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow/deployer) / [Kubeflow - Serve TF model](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/kubeflow/deployer/component.yaml)
|
||||
|
||||
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow) / [dnntrainer](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow/dnntrainer) / [Train FC DNN using TF](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/kubeflow/dnntrainer/component.yaml)
|
||||
|
||||
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow) / [katib-launcher](https://github.com/kubeflow/pipelines/tree/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow/katib-launcher) / [Katib - Launch Experiment](https://raw.githubusercontent.com/kubeflow/pipelines/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow/katib-launcher/component.yaml)
|
||||
|
||||
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow) / [kfserving](https://github.com/kubeflow/pipelines/tree/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow/kfserving) / [Kubeflow - Serve Model using KFServing](https://raw.githubusercontent.com/kubeflow/pipelines/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow/kfserving/component.yaml)
|
||||
|
||||
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow) / [launcher](https://github.com/kubeflow/pipelines/tree/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow/launcher) / [Kubeflow - Launch TFJob](https://raw.githubusercontent.com/kubeflow/pipelines/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow/launcher/component.yaml)
|
||||
|
||||
/ [local](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local) / [confusion_matrix](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local/confusion_matrix) / [Confusion matrix](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/local/confusion_matrix/component.yaml)
|
||||
|
||||
/ [local](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local) / [roc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local/roc) / [ROC curve](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/local/roc/component.yaml)
|
||||
|
||||
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Aggregate_regression_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Aggregate_regression_metrics) / [Aggregate regression metrics](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Aggregate_regression_metrics/component.yaml)
|
||||
|
||||
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Calculate_classification_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics/from_CSV) / [Calculate classification metrics from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics/from_CSV/component.yaml)
|
||||
|
||||
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Calculate_regression_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics/from_CSV) / [Calculate regression metrics from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml)
|
||||
|
||||
/ [notebooks](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks) / [Run_notebook_using_papermill](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks/Run_notebook_using_papermill) / [Run notebook using papermill](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks/Run_notebook_using_papermill/component.yaml)
|
||||
|
||||
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [delete](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/delete) / [nuclio delete](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/delete/component.yaml)
|
||||
|
||||
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [deploy](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/deploy) / [nuclio deploy](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/deploy/component.yaml)
|
||||
|
||||
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [invoker](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/invoker) / [nuclio invoker](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/invoker/component.yaml)
|
||||
|
||||
/ [pandas](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas) / [Transform_DataFrame](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame) / [in_ApacheParquet_format](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_ApacheParquet_format) / [Pandas Transform DataFrame in ApacheParquet format](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_ApacheParquet_format/component.yaml)
|
||||
|
||||
/ [pandas](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas) / [Transform_DataFrame](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame) / [in_CSV_format](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_CSV_format) / [Pandas Transform DataFrame in CSV format](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml)
|
||||
|
||||
/ [presto](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/presto) / [query](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/presto/query) / [Presto Query](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/presto/query/component.yaml)
|
||||
|
||||
/ [sample](https://github.com/kubeflow/pipelines/tree/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample) / [C%23_script](https://github.com/kubeflow/pipelines/tree/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample/C%23_script) / [Filter text](https://raw.githubusercontent.com/kubeflow/pipelines/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample/C%23_script/component.yaml)
|
||||
|
||||
/ [sample](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample) / [keras](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras) / [train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras/train_classifier) / [Keras - Train classifier](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras/train_classifier/component.yaml)
|
||||
|
||||
/ [sample](https://github.com/kubeflow/pipelines/tree/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample) / [R_script](https://github.com/kubeflow/pipelines/tree/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample/R_script) / [Filter text](https://raw.githubusercontent.com/kubeflow/pipelines/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample/R_script/component.yaml)
|
||||
|
||||
/ [sample](https://github.com/kubeflow/pipelines/tree/1417e5e794103164a2836f86116666ef965bf1f5/components/sample) / [Shell_script](https://github.com/kubeflow/pipelines/tree/1417e5e794103164a2836f86116666ef965bf1f5/components/sample/Shell_script) / [Filter text using shell and grep](https://raw.githubusercontent.com/kubeflow/pipelines/1417e5e794103164a2836f86116666ef965bf1f5/components/sample/Shell_script/component.yaml)
|
||||
|
||||
/ [tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tables) / [Remove_header](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tables/Remove_header) / [Remove header](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/tables/Remove_header/component.yaml)
|
||||
|
||||
/ [tensorflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow) / [tensorboard](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard) / [prepare_tensorboard](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard/prepare_tensorboard) / [Create Tensorboard visualization](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml)
|
||||
|
||||
/ [web](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/web) / [Download](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/web/Download) / [Download data](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/web/Download/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Cross_validation_for_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression/from_CSV) / [Xgboost 5 fold cross validation for regression](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict) / [Xgboost predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict) / [from_ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/from_ApacheParquet) / [Xgboost predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/from_ApacheParquet/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train) / [Xgboost train](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train) / [from_ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/from_ApacheParquet) / [Xgboost train](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/from_ApacheParquet/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train_and_cross-validate_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression/from_CSV) / [Xgboost train and cv regression on csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml)
|
||||
|
||||
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train_regression_and_calculate_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV) / [Xgboost train regression and calculate metrics on csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,71 +0,0 @@
|
|||
from collections import OrderedDict
|
||||
from kfp import components
|
||||
|
||||
|
||||
split_table_into_folds_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml')
|
||||
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
|
||||
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
|
||||
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
|
||||
drop_header_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
|
||||
calculate_regression_metrics_from_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
|
||||
aggregate_regression_metrics_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml')
|
||||
|
||||
|
||||
def xgboost_5_fold_cross_validation_for_regression(
|
||||
data: 'CSV',
|
||||
label_column: int = 0,
|
||||
objective: str = 'reg:squarederror',
|
||||
num_iterations: int = 200,
|
||||
):
|
||||
folds = split_table_into_folds_op(data).outputs
|
||||
|
||||
fold_metrics = {}
|
||||
for i in range(1, 6):
|
||||
training_data = folds['train_' + str(i)]
|
||||
testing_data = folds['test_' + str(i)]
|
||||
model = xgboost_train_on_csv_op(
|
||||
training_data=training_data,
|
||||
label_column=label_column,
|
||||
objective=objective,
|
||||
num_iterations=num_iterations,
|
||||
).outputs['model']
|
||||
|
||||
predictions = xgboost_predict_on_csv_op(
|
||||
data=testing_data,
|
||||
model=model,
|
||||
label_column=label_column,
|
||||
).output
|
||||
|
||||
true_values_table = pandas_transform_csv_op(
|
||||
table=testing_data,
|
||||
transform_code='df = df[["tips"]]',
|
||||
).output
|
||||
|
||||
true_values = drop_header_op(true_values_table).output
|
||||
|
||||
metrics = calculate_regression_metrics_from_csv_op(
|
||||
true_values=true_values,
|
||||
predicted_values=predictions,
|
||||
).outputs['metrics']
|
||||
|
||||
fold_metrics['metrics_' + str(i)] = metrics
|
||||
|
||||
aggregated_metrics_task = aggregate_regression_metrics_op(**fold_metrics)
|
||||
|
||||
return OrderedDict([
|
||||
('mean_absolute_error', aggregated_metrics_task.outputs['mean_absolute_error']),
|
||||
('mean_squared_error', aggregated_metrics_task.outputs['mean_squared_error']),
|
||||
('root_mean_squared_error', aggregated_metrics_task.outputs['root_mean_squared_error']),
|
||||
('metrics', aggregated_metrics_task.outputs['metrics']),
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
xgboost_5_fold_cross_validation_for_regression_op = components.create_graph_component_from_pipeline_func(
|
||||
xgboost_5_fold_cross_validation_for_regression,
|
||||
output_component_file='component.yaml',
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,276 +0,0 @@
|
|||
name: Xgboost 5 fold cross validation for regression
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '200', optional: true}
|
||||
outputs:
|
||||
- {name: mean_absolute_error, type: Float}
|
||||
- {name: mean_squared_error, type: Float}
|
||||
- {name: root_mean_squared_error, type: Float}
|
||||
- {name: metrics, type: JsonObject}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml'
|
||||
implementation:
|
||||
graph:
|
||||
tasks:
|
||||
Split table into folds:
|
||||
componentRef: {digest: 9956223bcecc7294ca1afac39b60ada4a935a571d817c3dfbf2ea4a211afe3d1,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
graphInput: {inputName: data}
|
||||
Xgboost train:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_1, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format, type: CSV}
|
||||
Calculate regression metrics from csv:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
|
||||
Xgboost train 2:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_2, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 2:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 2, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format 2:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 2:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 2, type: CSV}
|
||||
Calculate regression metrics from csv 2:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 2}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 2, type: Text}
|
||||
Xgboost train 3:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_3, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 3:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 3, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format 3:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 3:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 3, type: CSV}
|
||||
Calculate regression metrics from csv 3:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 3}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 3, type: Text}
|
||||
Xgboost train 4:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_4, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 4:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 4, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format 4:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 4:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 4, type: CSV}
|
||||
Calculate regression metrics from csv 4:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 4}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 4, type: Text}
|
||||
Xgboost train 5:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_5, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 5:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 5, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format 5:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 5:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 5, type: CSV}
|
||||
Calculate regression metrics from csv 5:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 5}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 5, type: Text}
|
||||
Aggregate regression metrics from csv:
|
||||
componentRef: {digest: 3e128130521eff8d43764f3dcb037316cdd6490ad2878df5adef416f7c2f3c19,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml'}
|
||||
arguments:
|
||||
metrics_1:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv, type: JsonObject}
|
||||
metrics_2:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 2, type: JsonObject}
|
||||
metrics_3:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 3, type: JsonObject}
|
||||
metrics_4:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 4, type: JsonObject}
|
||||
metrics_5:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 5, type: JsonObject}
|
||||
outputValues:
|
||||
mean_absolute_error:
|
||||
taskOutput: {outputName: mean_absolute_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
mean_squared_error:
|
||||
taskOutput: {outputName: mean_squared_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
root_mean_squared_error:
|
||||
taskOutput: {outputName: root_mean_squared_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
metrics:
|
||||
taskOutput: {outputName: metrics, taskId: Aggregate regression metrics from
|
||||
csv, type: JsonObject}
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def xgboost_predict(
|
||||
data_path: InputPath('CSV'), # Also supports LibSVM
|
||||
model_path: InputPath('XGBoostModel'),
|
||||
predictions_path: OutputPath('Predictions'),
|
||||
label_column: int = None,
|
||||
):
|
||||
'''Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in CSV format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
df = pandas.read_csv(
|
||||
data_path,
|
||||
)
|
||||
|
||||
if label_column is not None:
|
||||
df = df.drop(columns=[df.columns[label_column]])
|
||||
|
||||
testing_data = xgboost.DMatrix(
|
||||
data=df,
|
||||
)
|
||||
|
||||
model = xgboost.Booster(model_file=model_path)
|
||||
|
||||
predictions = model.predict(testing_data)
|
||||
|
||||
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
xgboost_predict,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=[
|
||||
'xgboost==1.1.1',
|
||||
'pandas==1.0.5',
|
||||
],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
name: Xgboost predict
|
||||
description: |-
|
||||
Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in CSV format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: label_column, type: Integer, optional: true}
|
||||
outputs:
|
||||
- {name: predictions, type: Predictions}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def xgboost_predict(
|
||||
data_path, # Also supports LibSVM
|
||||
model_path,
|
||||
predictions_path,
|
||||
label_column = None,
|
||||
):
|
||||
'''Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in CSV format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column: Column containing the label data.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
df = pandas.read_csv(
|
||||
data_path,
|
||||
)
|
||||
|
||||
if label_column is not None:
|
||||
df = df.drop(columns=[df.columns[label_column]])
|
||||
|
||||
testing_data = xgboost.DMatrix(
|
||||
data=df,
|
||||
)
|
||||
|
||||
model = xgboost.Booster(model_file=model_path)
|
||||
|
||||
predictions = model.predict(testing_data)
|
||||
|
||||
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in CSV format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column: Column containing the label data.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = xgboost_predict(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- --predictions
|
||||
- {outputPath: predictions}
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def xgboost_predict(
|
||||
data_path: InputPath('ApacheParquet'),
|
||||
model_path: InputPath('XGBoostModel'),
|
||||
predictions_path: OutputPath('Predictions'),
|
||||
label_column_name: str = None,
|
||||
):
|
||||
'''Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in Apache Parquet format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
# Loading data
|
||||
df = pandas.read_parquet(data_path)
|
||||
if label_column_name:
|
||||
df = df.drop(columns=[label_column_name])
|
||||
|
||||
evaluation_data = xgboost.DMatrix(
|
||||
data=df,
|
||||
)
|
||||
|
||||
# Training
|
||||
model = xgboost.Booster(model_file=model_path)
|
||||
|
||||
predictions = model.predict(evaluation_data)
|
||||
|
||||
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
xgboost_predict,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=[
|
||||
'xgboost==1.1.1',
|
||||
'pandas==1.0.5',
|
||||
'pyarrow==0.17.1',
|
||||
],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/from_ApacheParquet/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
name: Xgboost predict
|
||||
description: |-
|
||||
Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in Apache Parquet format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: ApacheParquet}
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: label_column_name, type: String, optional: true}
|
||||
outputs:
|
||||
- {name: predictions, type: Predictions}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/from_ApacheParquet/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
||||
'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def xgboost_predict(
|
||||
data_path,
|
||||
model_path,
|
||||
predictions_path,
|
||||
label_column_name = None,
|
||||
):
|
||||
'''Make predictions using a trained XGBoost model.
|
||||
|
||||
Args:
|
||||
data_path: Path for the feature data in Apache Parquet format.
|
||||
model_path: Path for the trained model in binary XGBoost format.
|
||||
predictions_path: Output path for the predictions.
|
||||
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
|
||||
import numpy
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
# Loading data
|
||||
df = pandas.read_parquet(data_path)
|
||||
if label_column_name:
|
||||
df = df.drop(columns=[label_column_name])
|
||||
|
||||
evaluation_data = xgboost.DMatrix(
|
||||
data=df,
|
||||
)
|
||||
|
||||
# Training
|
||||
model = xgboost.Booster(model_file=model_path)
|
||||
|
||||
predictions = model.predict(evaluation_data)
|
||||
|
||||
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
numpy.savetxt(predictions_path, predictions)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in Apache Parquet format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column-name", dest="label_column_name", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = xgboost_predict(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: label_column_name}
|
||||
then:
|
||||
- --label-column-name
|
||||
- {inputValue: label_column_name}
|
||||
- --predictions
|
||||
- {outputPath: predictions}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/XGBoost](https://github.com/kubeflow/pipelines/tree/master/components/contrib/XGBoost). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def xgboost_train(
|
||||
training_data_path: InputPath('CSV'), # Also supports LibSVM
|
||||
model_path: OutputPath('XGBoostModel'),
|
||||
model_config_path: OutputPath('XGBoostModelConfig'),
|
||||
starting_model_path: InputPath('XGBoostModel') = None,
|
||||
|
||||
label_column: int = 0,
|
||||
num_iterations: int = 10,
|
||||
booster_params: dict = None,
|
||||
|
||||
# Booster parameters
|
||||
objective: str = 'reg:squarederror',
|
||||
booster: str = 'gbtree',
|
||||
learning_rate: float = 0.3,
|
||||
min_split_loss: float = 0,
|
||||
max_depth: int = 6,
|
||||
):
|
||||
'''Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
df = pandas.read_csv(
|
||||
training_data_path,
|
||||
)
|
||||
|
||||
training_data = xgboost.DMatrix(
|
||||
data=df.drop(columns=[df.columns[label_column]]),
|
||||
label=df[df.columns[label_column]],
|
||||
)
|
||||
|
||||
booster_params = booster_params or {}
|
||||
booster_params.setdefault('objective', objective)
|
||||
booster_params.setdefault('booster', booster)
|
||||
booster_params.setdefault('learning_rate', learning_rate)
|
||||
booster_params.setdefault('min_split_loss', min_split_loss)
|
||||
booster_params.setdefault('max_depth', max_depth)
|
||||
|
||||
starting_model = None
|
||||
if starting_model_path:
|
||||
starting_model = xgboost.Booster(model_file=starting_model_path)
|
||||
|
||||
model = xgboost.train(
|
||||
params=booster_params,
|
||||
dtrain=training_data,
|
||||
num_boost_round=num_iterations,
|
||||
xgb_model=starting_model
|
||||
)
|
||||
|
||||
# Saving the model in binary format
|
||||
model.save_model(model_path)
|
||||
|
||||
model_config_str = model.save_config()
|
||||
with open(model_config_path, 'w') as model_config_file:
|
||||
model_config_file.write(model_config_str)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
xgboost_train,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=[
|
||||
'xgboost==1.1.1',
|
||||
'pandas==1.0.5',
|
||||
],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,208 +0,0 @@
|
|||
name: Xgboost train
|
||||
description: |-
|
||||
Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: training_data, type: CSV}
|
||||
- {name: starting_model, type: XGBoostModel, optional: true}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '10', optional: true}
|
||||
- {name: booster_params, type: JsonObject, optional: true}
|
||||
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
|
||||
- {name: booster, type: String, default: gbtree, optional: true}
|
||||
- {name: learning_rate, type: Float, default: '0.3', optional: true}
|
||||
- {name: min_split_loss, type: Float, default: '0', optional: true}
|
||||
- {name: max_depth, type: Integer, default: '6', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: model_config, type: XGBoostModelConfig}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def xgboost_train(
|
||||
training_data_path, # Also supports LibSVM
|
||||
model_path,
|
||||
model_config_path,
|
||||
starting_model_path = None,
|
||||
|
||||
label_column = 0,
|
||||
num_iterations = 10,
|
||||
booster_params = None,
|
||||
|
||||
# Booster parameters
|
||||
objective = 'reg:squarederror',
|
||||
booster = 'gbtree',
|
||||
learning_rate = 0.3,
|
||||
min_split_loss = 0,
|
||||
max_depth = 6,
|
||||
):
|
||||
'''Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in CSV format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column: Column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
df = pandas.read_csv(
|
||||
training_data_path,
|
||||
)
|
||||
|
||||
training_data = xgboost.DMatrix(
|
||||
data=df.drop(columns=[df.columns[label_column]]),
|
||||
label=df[df.columns[label_column]],
|
||||
)
|
||||
|
||||
booster_params = booster_params or {}
|
||||
booster_params.setdefault('objective', objective)
|
||||
booster_params.setdefault('booster', booster)
|
||||
booster_params.setdefault('learning_rate', learning_rate)
|
||||
booster_params.setdefault('min_split_loss', min_split_loss)
|
||||
booster_params.setdefault('max_depth', max_depth)
|
||||
|
||||
starting_model = None
|
||||
if starting_model_path:
|
||||
starting_model = xgboost.Booster(model_file=starting_model_path)
|
||||
|
||||
model = xgboost.train(
|
||||
params=booster_params,
|
||||
dtrain=training_data,
|
||||
num_boost_round=num_iterations,
|
||||
xgb_model=starting_model
|
||||
)
|
||||
|
||||
# Saving the model in binary format
|
||||
model.save_model(model_path)
|
||||
|
||||
model_config_str = model.save_config()
|
||||
with open(model_config_path, 'w') as model_config_file:
|
||||
model_config_file.write(model_config_str)
|
||||
|
||||
import json
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n "reg:squarederror" - Regression with squared loss (default).\n "reg:logistic" - Logistic regression.\n "binary:logistic" - Logistic regression for binary classification, output probability.\n "binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation\n "rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n "rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--booster-params", dest="booster_params", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--objective", dest="objective", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--booster", dest="booster", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--min-split-loss", dest="min_split_loss", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--max-depth", dest="max_depth", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model-config", dest="model_config_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = xgboost_train(**_parsed_args)
|
||||
args:
|
||||
- --training-data
|
||||
- {inputPath: training_data}
|
||||
- if:
|
||||
cond: {isPresent: starting_model}
|
||||
then:
|
||||
- --starting-model
|
||||
- {inputPath: starting_model}
|
||||
- if:
|
||||
cond: {isPresent: label_column}
|
||||
then:
|
||||
- --label-column
|
||||
- {inputValue: label_column}
|
||||
- if:
|
||||
cond: {isPresent: num_iterations}
|
||||
then:
|
||||
- --num-iterations
|
||||
- {inputValue: num_iterations}
|
||||
- if:
|
||||
cond: {isPresent: booster_params}
|
||||
then:
|
||||
- --booster-params
|
||||
- {inputValue: booster_params}
|
||||
- if:
|
||||
cond: {isPresent: objective}
|
||||
then:
|
||||
- --objective
|
||||
- {inputValue: objective}
|
||||
- if:
|
||||
cond: {isPresent: booster}
|
||||
then:
|
||||
- --booster
|
||||
- {inputValue: booster}
|
||||
- if:
|
||||
cond: {isPresent: learning_rate}
|
||||
then:
|
||||
- --learning-rate
|
||||
- {inputValue: learning_rate}
|
||||
- if:
|
||||
cond: {isPresent: min_split_loss}
|
||||
then:
|
||||
- --min-split-loss
|
||||
- {inputValue: min_split_loss}
|
||||
- if:
|
||||
cond: {isPresent: max_depth}
|
||||
then:
|
||||
- --max-depth
|
||||
- {inputValue: max_depth}
|
||||
- --model
|
||||
- {outputPath: model}
|
||||
- --model-config
|
||||
- {outputPath: model_config}
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def xgboost_train(
|
||||
training_data_path: InputPath('ApacheParquet'),
|
||||
model_path: OutputPath('XGBoostModel'),
|
||||
model_config_path: OutputPath('XGBoostModelConfig'),
|
||||
label_column_name: str,
|
||||
|
||||
starting_model_path: InputPath('XGBoostModel') = None,
|
||||
|
||||
num_iterations: int = 10,
|
||||
booster_params: dict = None,
|
||||
|
||||
# Booster parameters
|
||||
objective: str = 'reg:squarederror',
|
||||
booster: str = 'gbtree',
|
||||
learning_rate: float = 0.3,
|
||||
min_split_loss: float = 0,
|
||||
max_depth: int = 6,
|
||||
):
|
||||
'''Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in Apache Parquet format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column_name: Name of the column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
# Loading data
|
||||
df = pandas.read_parquet(training_data_path)
|
||||
training_data = xgboost.DMatrix(
|
||||
data=df.drop(columns=[label_column_name]),
|
||||
label=df[[label_column_name]],
|
||||
)
|
||||
# Training
|
||||
booster_params = booster_params or {}
|
||||
booster_params.setdefault('objective', objective)
|
||||
booster_params.setdefault('booster', booster)
|
||||
booster_params.setdefault('learning_rate', learning_rate)
|
||||
booster_params.setdefault('min_split_loss', min_split_loss)
|
||||
booster_params.setdefault('max_depth', max_depth)
|
||||
|
||||
starting_model = None
|
||||
if starting_model_path:
|
||||
starting_model = xgboost.Booster(model_file=starting_model_path)
|
||||
|
||||
model = xgboost.train(
|
||||
params=booster_params,
|
||||
dtrain=training_data,
|
||||
num_boost_round=num_iterations,
|
||||
xgb_model=starting_model
|
||||
)
|
||||
|
||||
# Saving the model in binary format
|
||||
model.save_model(model_path)
|
||||
|
||||
model_config_str = model.save_config()
|
||||
with open(model_config_path, 'w') as model_config_file:
|
||||
model_config_file.write(model_config_str)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
xgboost_train,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=[
|
||||
'xgboost==1.1.1',
|
||||
'pandas==1.0.5',
|
||||
'pyarrow==0.17.1',
|
||||
],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/from_ApacheParquet/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,204 +0,0 @@
|
|||
name: Xgboost train
|
||||
description: |-
|
||||
Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in Apache Parquet format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column_name: Name of the column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: training_data, type: ApacheParquet}
|
||||
- {name: label_column_name, type: String}
|
||||
- {name: starting_model, type: XGBoostModel, optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '10', optional: true}
|
||||
- {name: booster_params, type: JsonObject, optional: true}
|
||||
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
|
||||
- {name: booster, type: String, default: gbtree, optional: true}
|
||||
- {name: learning_rate, type: Float, default: '0.3', optional: true}
|
||||
- {name: min_split_loss, type: Float, default: '0', optional: true}
|
||||
- {name: max_depth, type: Integer, default: '6', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: model_config, type: XGBoostModelConfig}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/from_ApacheParquet/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
|
||||
'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def xgboost_train(
|
||||
training_data_path,
|
||||
model_path,
|
||||
model_config_path,
|
||||
label_column_name,
|
||||
|
||||
starting_model_path = None,
|
||||
|
||||
num_iterations = 10,
|
||||
booster_params = None,
|
||||
|
||||
# Booster parameters
|
||||
objective = 'reg:squarederror',
|
||||
booster = 'gbtree',
|
||||
learning_rate = 0.3,
|
||||
min_split_loss = 0,
|
||||
max_depth = 6,
|
||||
):
|
||||
'''Train an XGBoost model.
|
||||
|
||||
Args:
|
||||
training_data_path: Path for the training data in Apache Parquet format.
|
||||
model_path: Output path for the trained model in binary XGBoost format.
|
||||
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
|
||||
starting_model_path: Path for the existing trained model to start from.
|
||||
label_column_name: Name of the column containing the label data.
|
||||
num_boost_rounds: Number of boosting iterations.
|
||||
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
|
||||
objective: The learning task and the corresponding learning objective.
|
||||
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
|
||||
The most common values are:
|
||||
"reg:squarederror" - Regression with squared loss (default).
|
||||
"reg:logistic" - Logistic regression.
|
||||
"binary:logistic" - Logistic regression for binary classification, output probability.
|
||||
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
|
||||
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
|
||||
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
import pandas
|
||||
import xgboost
|
||||
|
||||
# Loading data
|
||||
df = pandas.read_parquet(training_data_path)
|
||||
training_data = xgboost.DMatrix(
|
||||
data=df.drop(columns=[label_column_name]),
|
||||
label=df[[label_column_name]],
|
||||
)
|
||||
# Training
|
||||
booster_params = booster_params or {}
|
||||
booster_params.setdefault('objective', objective)
|
||||
booster_params.setdefault('booster', booster)
|
||||
booster_params.setdefault('learning_rate', learning_rate)
|
||||
booster_params.setdefault('min_split_loss', min_split_loss)
|
||||
booster_params.setdefault('max_depth', max_depth)
|
||||
|
||||
starting_model = None
|
||||
if starting_model_path:
|
||||
starting_model = xgboost.Booster(model_file=starting_model_path)
|
||||
|
||||
model = xgboost.train(
|
||||
params=booster_params,
|
||||
dtrain=training_data,
|
||||
num_boost_round=num_iterations,
|
||||
xgb_model=starting_model
|
||||
)
|
||||
|
||||
# Saving the model in binary format
|
||||
model.save_model(model_path)
|
||||
|
||||
model_config_str = model.save_config()
|
||||
with open(model_config_path, 'w') as model_config_file:
|
||||
model_config_file.write(model_config_str)
|
||||
|
||||
import json
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in Apache Parquet format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column_name: Name of the column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n "reg:squarederror" - Regression with squared loss (default).\n "reg:logistic" - Logistic regression.\n "binary:logistic" - Logistic regression for binary classification, output probability.\n "binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation\n "rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n "rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--label-column-name", dest="label_column_name", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--booster-params", dest="booster_params", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--objective", dest="objective", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--booster", dest="booster", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--min-split-loss", dest="min_split_loss", type=float, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--max-depth", dest="max_depth", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model-config", dest="model_config_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = xgboost_train(**_parsed_args)
|
||||
args:
|
||||
- --training-data
|
||||
- {inputPath: training_data}
|
||||
- --label-column-name
|
||||
- {inputValue: label_column_name}
|
||||
- if:
|
||||
cond: {isPresent: starting_model}
|
||||
then:
|
||||
- --starting-model
|
||||
- {inputPath: starting_model}
|
||||
- if:
|
||||
cond: {isPresent: num_iterations}
|
||||
then:
|
||||
- --num-iterations
|
||||
- {inputValue: num_iterations}
|
||||
- if:
|
||||
cond: {isPresent: booster_params}
|
||||
then:
|
||||
- --booster-params
|
||||
- {inputValue: booster_params}
|
||||
- if:
|
||||
cond: {isPresent: objective}
|
||||
then:
|
||||
- --objective
|
||||
- {inputValue: objective}
|
||||
- if:
|
||||
cond: {isPresent: booster}
|
||||
then:
|
||||
- --booster
|
||||
- {inputValue: booster}
|
||||
- if:
|
||||
cond: {isPresent: learning_rate}
|
||||
then:
|
||||
- --learning-rate
|
||||
- {inputValue: learning_rate}
|
||||
- if:
|
||||
cond: {isPresent: min_split_loss}
|
||||
then:
|
||||
- --min-split-loss
|
||||
- {inputValue: min_split_loss}
|
||||
- if:
|
||||
cond: {isPresent: max_depth}
|
||||
then:
|
||||
- --max-depth
|
||||
- {inputValue: max_depth}
|
||||
- --model
|
||||
- {outputPath: model}
|
||||
- --model-config
|
||||
- {outputPath: model_config}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
from collections import OrderedDict
|
||||
from kfp import components
|
||||
|
||||
|
||||
xgboost_train_regression_and_calculate_metrics_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/18e8974288885086b2fd5351f6333210cd237d1b/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml')
|
||||
xgboost_5_fold_cross_validation_for_regression_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/271593e4193e2d3e44bdf42269fc03f0fcd2e5e8/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml')
|
||||
|
||||
|
||||
def xgboost_train_and_cv_regression_on_csv(
|
||||
data: 'CSV',
|
||||
label_column: int = 0,
|
||||
objective: str = 'reg:squarederror',
|
||||
num_iterations: int = 200,
|
||||
):
|
||||
main_training_and_metrics_task = xgboost_train_regression_and_calculate_metrics_on_csv_op(
|
||||
training_data=data,
|
||||
testing_data=data,
|
||||
label_column=label_column,
|
||||
objective=objective,
|
||||
num_iterations=num_iterations,
|
||||
)
|
||||
|
||||
cv_training_and_metrics_task = xgboost_5_fold_cross_validation_for_regression_op(
|
||||
data=data,
|
||||
label_column=label_column,
|
||||
objective=objective,
|
||||
num_iterations=num_iterations,
|
||||
)
|
||||
|
||||
return OrderedDict([
|
||||
('model', main_training_and_metrics_task.outputs['model']),
|
||||
|
||||
('training_mean_absolute_error', main_training_and_metrics_task.outputs['mean_absolute_error']),
|
||||
('training_mean_squared_error', main_training_and_metrics_task.outputs['mean_squared_error']),
|
||||
('training_root_mean_squared_error', main_training_and_metrics_task.outputs['root_mean_squared_error']),
|
||||
('training_metrics', main_training_and_metrics_task.outputs['metrics']),
|
||||
|
||||
('cv_mean_absolute_error', cv_training_and_metrics_task.outputs['mean_absolute_error']),
|
||||
('cv_mean_squared_error', cv_training_and_metrics_task.outputs['mean_squared_error']),
|
||||
('cv_root_mean_squared_error', cv_training_and_metrics_task.outputs['root_mean_squared_error']),
|
||||
('cv_metrics', cv_training_and_metrics_task.outputs['metrics']),
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
xgboost_train_and_cv_regression_on_csv_op = components.create_graph_component_from_pipeline_func(
|
||||
xgboost_train_and_cv_regression_on_csv,
|
||||
output_component_file='component.yaml',
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,339 +0,0 @@
|
|||
name: Xgboost train and cv regression on csv
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '200', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: training_mean_absolute_error, type: Float}
|
||||
- {name: training_mean_squared_error, type: Float}
|
||||
- {name: training_root_mean_squared_error, type: Float}
|
||||
- {name: training_metrics, type: JsonObject}
|
||||
- {name: cv_mean_absolute_error, type: Float}
|
||||
- {name: cv_mean_squared_error, type: Float}
|
||||
- {name: cv_root_mean_squared_error, type: Float}
|
||||
- {name: cv_metrics, type: JsonObject}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml'
|
||||
implementation:
|
||||
graph:
|
||||
tasks:
|
||||
Xgboost train:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
graphInput: {inputName: data}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
graphInput: {inputName: data}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
graphInput: {inputName: data}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format, type: CSV}
|
||||
Calculate regression metrics from csv:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
|
||||
Split table into folds:
|
||||
componentRef: {digest: 9956223bcecc7294ca1afac39b60ada4a935a571d817c3dfbf2ea4a211afe3d1,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
graphInput: {inputName: data}
|
||||
Pandas Transform DataFrame in CSV format 2:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 2:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 2, type: CSV}
|
||||
Xgboost train 2:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_1, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 2:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 2, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format 3:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 3:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 3, type: CSV}
|
||||
Xgboost train 3:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_4, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Pandas Transform DataFrame in CSV format 4:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 4:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 4, type: CSV}
|
||||
Xgboost predict 3:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 3, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Calculate regression metrics from csv 2:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 4}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 3, type: Text}
|
||||
Pandas Transform DataFrame in CSV format 5:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 5:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 5, type: CSV}
|
||||
Calculate regression metrics from csv 3:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 5}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 2, type: Text}
|
||||
Xgboost train 4:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_2, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 4:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 4, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Calculate regression metrics from csv 4:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 3}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 4, type: Text}
|
||||
Xgboost train 5:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_5, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 5:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 5, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Xgboost train 6:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
taskOutput: {outputName: train_3, taskId: Split table into folds, type: CSV}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict 6:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train 6, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Calculate regression metrics from csv 5:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 2}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 6, type: Text}
|
||||
Pandas Transform DataFrame in CSV format 6:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header 6:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format 6, type: CSV}
|
||||
Calculate regression metrics from csv 6:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header 6}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict 5, type: Text}
|
||||
Aggregate regression metrics from csv:
|
||||
componentRef: {digest: 3e128130521eff8d43764f3dcb037316cdd6490ad2878df5adef416f7c2f3c19,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml'}
|
||||
arguments:
|
||||
metrics_1:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 3, type: JsonObject}
|
||||
metrics_2:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 4, type: JsonObject}
|
||||
metrics_3:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 5, type: JsonObject}
|
||||
metrics_4:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 2, type: JsonObject}
|
||||
metrics_5:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
|
||||
from csv 6, type: JsonObject}
|
||||
outputValues:
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
|
||||
training_mean_absolute_error:
|
||||
taskOutput: {outputName: mean_absolute_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
training_mean_squared_error:
|
||||
taskOutput: {outputName: mean_squared_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
training_root_mean_squared_error:
|
||||
taskOutput: {outputName: root_mean_squared_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
training_metrics:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics from
|
||||
csv, type: JsonObject}
|
||||
cv_mean_absolute_error:
|
||||
taskOutput: {outputName: mean_absolute_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
cv_mean_squared_error:
|
||||
taskOutput: {outputName: mean_squared_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
cv_root_mean_squared_error:
|
||||
taskOutput: {outputName: root_mean_squared_error, taskId: Aggregate regression
|
||||
metrics from csv, type: Float}
|
||||
cv_metrics:
|
||||
taskOutput: {outputName: metrics, taskId: Aggregate regression metrics from
|
||||
csv, type: JsonObject}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
from collections import OrderedDict
|
||||
from kfp import components
|
||||
|
||||
|
||||
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
|
||||
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
|
||||
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
|
||||
drop_header_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
|
||||
calculate_regression_metrics_from_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
|
||||
|
||||
|
||||
def xgboost_train_regression_and_calculate_metrics_on_csv(
|
||||
training_data: 'CSV',
|
||||
testing_data: 'CSV',
|
||||
label_column: int = 0,
|
||||
objective: str = 'reg:squarederror',
|
||||
num_iterations: int = 200,
|
||||
):
|
||||
model = xgboost_train_on_csv_op(
|
||||
training_data=training_data,
|
||||
label_column=label_column,
|
||||
objective=objective,
|
||||
num_iterations=num_iterations,
|
||||
).outputs['model']
|
||||
|
||||
predictions = xgboost_predict_on_csv_op(
|
||||
data=testing_data,
|
||||
model=model,
|
||||
label_column=label_column,
|
||||
).output
|
||||
|
||||
true_values_table = pandas_transform_csv_op(
|
||||
table=testing_data,
|
||||
transform_code='df = df[["tips"]]',
|
||||
).output
|
||||
|
||||
true_values = drop_header_op(true_values_table).output
|
||||
|
||||
metrics_task = calculate_regression_metrics_from_csv_op(
|
||||
true_values=true_values,
|
||||
predicted_values=predictions,
|
||||
)
|
||||
return OrderedDict([
|
||||
('model', model),
|
||||
('mean_absolute_error', metrics_task.outputs['mean_absolute_error']),
|
||||
('mean_squared_error', metrics_task.outputs['mean_squared_error']),
|
||||
('root_mean_squared_error', metrics_task.outputs['root_mean_squared_error']),
|
||||
('metrics', metrics_task.outputs['metrics']),
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
xgboost_train_regression_and_calculate_metrics_on_csv_op = components.create_graph_component_from_pipeline_func(
|
||||
xgboost_train_regression_and_calculate_metrics_on_csv,
|
||||
output_component_file='component.yaml',
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
name: Xgboost train regression and calculate metrics on csv
|
||||
inputs:
|
||||
- {name: training_data, type: CSV}
|
||||
- {name: testing_data, type: CSV}
|
||||
- {name: label_column, type: Integer, default: '0', optional: true}
|
||||
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
|
||||
- {name: num_iterations, type: Integer, default: '200', optional: true}
|
||||
outputs:
|
||||
- {name: model, type: XGBoostModel}
|
||||
- {name: mean_absolute_error, type: Float}
|
||||
- {name: mean_squared_error, type: Float}
|
||||
- {name: root_mean_squared_error, type: Float}
|
||||
- {name: metrics, type: JsonObject}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml'
|
||||
implementation:
|
||||
graph:
|
||||
tasks:
|
||||
Xgboost train:
|
||||
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
|
||||
arguments:
|
||||
training_data:
|
||||
graphInput: {inputName: training_data}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
num_iterations:
|
||||
graphInput: {inputName: num_iterations}
|
||||
objective:
|
||||
graphInput: {inputName: objective}
|
||||
Xgboost predict:
|
||||
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
|
||||
arguments:
|
||||
data:
|
||||
graphInput: {inputName: testing_data}
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
|
||||
label_column:
|
||||
graphInput: {inputName: label_column}
|
||||
Pandas Transform DataFrame in CSV format:
|
||||
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
graphInput: {inputName: testing_data}
|
||||
transform_code: df = df[["tips"]]
|
||||
Remove header:
|
||||
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
|
||||
arguments:
|
||||
table:
|
||||
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
|
||||
in CSV format, type: CSV}
|
||||
Calculate regression metrics from csv:
|
||||
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
|
||||
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
|
||||
arguments:
|
||||
true_values:
|
||||
taskOutput: {outputName: table, taskId: Remove header}
|
||||
predicted_values:
|
||||
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
|
||||
outputValues:
|
||||
model:
|
||||
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
|
||||
mean_absolute_error:
|
||||
taskOutput: {outputName: mean_absolute_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
mean_squared_error:
|
||||
taskOutput: {outputName: mean_squared_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
root_mean_squared_error:
|
||||
taskOutput: {outputName: root_mean_squared_error, taskId: Calculate regression
|
||||
metrics from csv, type: Float}
|
||||
metrics:
|
||||
taskOutput: {outputName: metrics, taskId: Calculate regression metrics from
|
||||
csv, type: JsonObject}
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# This sample demonstrates continuous training using a train-eval-check recursive loop.
|
||||
# The main pipeline trains the initial model and then gradually trains the model
|
||||
# some more until the model evaluation metrics are good enough.
|
||||
|
||||
import kfp
|
||||
from kfp import components
|
||||
|
||||
|
||||
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
|
||||
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
|
||||
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
|
||||
|
||||
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
|
||||
drop_header_op = kfp.components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
|
||||
calculate_regression_metrics_from_csv_op = kfp.components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/616542ac0f789914f4eb53438da713dd3004fba4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
|
||||
|
||||
|
||||
# This recursive sub-pipeline trains a model, evaluates it, calculates the metrics and checks them.
|
||||
# If the model error is too high, then more training is performed until the model is good.
|
||||
@kfp.dsl.graph_component
|
||||
def train_until_low_error(starting_model, training_data, true_values):
|
||||
# Training
|
||||
model = xgboost_train_on_csv_op(
|
||||
training_data=training_data,
|
||||
starting_model=starting_model,
|
||||
label_column=0,
|
||||
objective='reg:squarederror',
|
||||
num_iterations=50,
|
||||
).outputs['model']
|
||||
|
||||
# Predicting
|
||||
predictions = xgboost_predict_on_csv_op(
|
||||
data=training_data,
|
||||
model=model,
|
||||
label_column=0,
|
||||
).output
|
||||
|
||||
# Calculating the regression metrics
|
||||
metrics_task = calculate_regression_metrics_from_csv_op(
|
||||
true_values=true_values,
|
||||
predicted_values=predictions,
|
||||
)
|
||||
|
||||
# Checking the metrics
|
||||
with kfp.dsl.Condition(metrics_task.outputs['mean_squared_error'] > 0.01):
|
||||
# Training some more
|
||||
train_until_low_error(
|
||||
starting_model=model,
|
||||
training_data=training_data,
|
||||
true_values=true_values,
|
||||
)
|
||||
|
||||
|
||||
# The main pipleine trains the initial model and then gradually trains the model some more until the model evaluation metrics are good enough.
|
||||
def train_until_good_pipeline():
|
||||
# Preparing the training data
|
||||
training_data = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
).output
|
||||
|
||||
# Preparing the true values
|
||||
true_values_table = pandas_transform_csv_op(
|
||||
table=training_data,
|
||||
transform_code='df = df[["tips"]]',
|
||||
).output
|
||||
|
||||
true_values = drop_header_op(true_values_table).output
|
||||
|
||||
# Initial model training
|
||||
first_model = xgboost_train_on_csv_op(
|
||||
training_data=training_data,
|
||||
label_column=0,
|
||||
objective='reg:squarederror',
|
||||
num_iterations=100,
|
||||
).outputs['model']
|
||||
|
||||
# Recursively training until the error becomes low
|
||||
train_until_low_error(
|
||||
starting_model=first_model,
|
||||
training_data=training_data,
|
||||
true_values=true_values,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp_endpoint=None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(train_until_good_pipeline, arguments={})
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
import kfp
|
||||
from kfp import components
|
||||
|
||||
|
||||
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
|
||||
convert_csv_to_apache_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0d7d6f41c92bdc05c2825232afe2b47e5cb6c4b3/components/_converters/ApacheParquet/from_CSV/component.yaml')
|
||||
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
|
||||
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
|
||||
xgboost_train_on_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0ae2f30ff24beeef1c64cc7c434f1f652c065192/components/XGBoost/Train/from_ApacheParquet/component.yaml')
|
||||
xgboost_predict_on_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0ae2f30ff24beeef1c64cc7c434f1f652c065192/components/XGBoost/Predict/from_ApacheParquet/component.yaml')
|
||||
|
||||
|
||||
def xgboost_pipeline():
|
||||
training_data_csv = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
).output
|
||||
|
||||
# Training and prediction on dataset in CSV format
|
||||
model_trained_on_csv = xgboost_train_on_csv_op(
|
||||
training_data=training_data_csv,
|
||||
label_column=0,
|
||||
objective='reg:squarederror',
|
||||
num_iterations=200,
|
||||
).outputs['model']
|
||||
|
||||
xgboost_predict_on_csv_op(
|
||||
data=training_data_csv,
|
||||
model=model_trained_on_csv,
|
||||
label_column=0,
|
||||
)
|
||||
|
||||
# Training and prediction on dataset in Apache Parquet format
|
||||
training_data_parquet = convert_csv_to_apache_parquet_op(
|
||||
training_data_csv
|
||||
).output
|
||||
|
||||
model_trained_on_parquet = xgboost_train_on_parquet_op(
|
||||
training_data=training_data_parquet,
|
||||
label_column_name='tips',
|
||||
objective='reg:squarederror',
|
||||
num_iterations=200,
|
||||
).outputs['model']
|
||||
|
||||
xgboost_predict_on_parquet_op(
|
||||
data=training_data_parquet,
|
||||
model=model_trained_on_parquet,
|
||||
label_column_name='tips',
|
||||
)
|
||||
|
||||
# Checking cross-format predictions
|
||||
xgboost_predict_on_parquet_op(
|
||||
data=training_data_parquet,
|
||||
model=model_trained_on_csv,
|
||||
label_column_name='tips',
|
||||
)
|
||||
|
||||
xgboost_predict_on_csv_op(
|
||||
data=training_data_csv,
|
||||
model=model_trained_on_parquet,
|
||||
label_column=0,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp_endpoint=None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(xgboost_pipeline, arguments={})
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
# cross_validation_pipeline compact
|
||||
import kfp
|
||||
from kfp import components
|
||||
|
||||
|
||||
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
|
||||
xgboost_train_and_cv_regression_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/1a11ce2aea5243cdcc2b4721675303f78f49ca21/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml')
|
||||
|
||||
|
||||
def cross_validation_pipeline(
|
||||
label_column: int = 0,
|
||||
objective: str = 'reg:squarederror',
|
||||
num_iterations: int = 200,
|
||||
):
|
||||
data = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "{}" AND trip_start_timestamp < "{}"'.format('2019-01-01', '2019-02-01'),
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
).output
|
||||
|
||||
xgboost_train_and_cv_regression_on_csv_op(
|
||||
data=data,
|
||||
label_column=label_column,
|
||||
objective=objective,
|
||||
num_iterations=num_iterations,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp_endpoint=None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(
|
||||
cross_validation_pipeline,
|
||||
arguments={},
|
||||
)
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
import kfp
|
||||
from kfp import components
|
||||
|
||||
component_store = components.ComponentStore(url_search_prefixes=['https://raw.githubusercontent.com/kubeflow/pipelines/af3eaf64e87313795cad1add9bfd9fa1e86af6de/components/'])
|
||||
|
||||
chicago_taxi_dataset_op = component_store.load_component(name='datasets/Chicago_Taxi_Trips')
|
||||
convert_csv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_CSV')
|
||||
convert_tsv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_TSV')
|
||||
convert_apache_parquet_to_csv_op = component_store.load_component(name='_converters/ApacheParquet/to_CSV')
|
||||
convert_apache_parquet_to_tsv_op = component_store.load_component(name='_converters/ApacheParquet/to_TSV')
|
||||
convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component(name='_converters/ApacheParquet/to_ApacheArrowFeather')
|
||||
convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_ApacheArrowFeather')
|
||||
|
||||
|
||||
def parquet_pipeline():
|
||||
csv = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
).output
|
||||
|
||||
tsv = chicago_taxi_dataset_op(
|
||||
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
|
||||
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
|
||||
limit=10000,
|
||||
format='tsv',
|
||||
).output
|
||||
|
||||
csv_parquet = convert_csv_to_apache_parquet_op(csv).output
|
||||
csv_parquet_csv = convert_apache_parquet_to_csv_op(csv_parquet).output
|
||||
csv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(csv_parquet).output
|
||||
csv_parquet_feather_parquet = convert_apache_arrow_feather_to_apache_parquet_op(csv_parquet_feather).output
|
||||
|
||||
tsv_parquet = convert_tsv_to_apache_parquet_op(tsv).output
|
||||
tsv_parquet_tsv = convert_apache_parquet_to_tsv_op(tsv_parquet).output
|
||||
tsv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(tsv_parquet).output
|
||||
tsv_parquet_feather_parquet = convert_apache_arrow_feather_to_apache_parquet_op(tsv_parquet_feather).output
|
||||
|
||||
if __name__ == '__main__':
|
||||
kfp_endpoint = None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(parquet_pipeline, arguments={})
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_apache_arrow_feather_to_apache_parquet(
|
||||
data_path: InputPath('ApacheArrowFeather'),
|
||||
output_data_path: OutputPath('ApacheParquet'),
|
||||
):
|
||||
'''Converts Apache Arrow Feather to Apache Parquet.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import feather, parquet
|
||||
|
||||
table = feather.read_table(data_path)
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
convert_apache_arrow_feather_to_apache_parquet,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
name: Convert apache arrow feather to apache parquet
|
||||
description: |-
|
||||
Converts Apache Arrow Feather to Apache Parquet.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: ApacheArrowFeather}
|
||||
outputs:
|
||||
- {name: output_data, type: ApacheParquet}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_apache_arrow_feather_to_apache_parquet(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts Apache Arrow Feather to Apache Parquet.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import feather, parquet
|
||||
|
||||
table = feather.read_table(data_path)
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert apache arrow feather to apache parquet', description='Converts Apache Arrow Feather to Apache Parquet.\n\n [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
_output_files = _parsed_args.pop("_output_paths", [])
|
||||
|
||||
_outputs = convert_apache_arrow_feather_to_apache_parquet(**_parsed_args)
|
||||
|
||||
_output_serializers = [
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
for idx, output_file in enumerate(_output_files):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(output_file))
|
||||
except OSError:
|
||||
pass
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(_output_serializers[idx](_outputs[idx]))
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_csv_to_apache_parquet(
|
||||
data_path: InputPath('CSV'),
|
||||
output_data_path: OutputPath('ApacheParquet'),
|
||||
):
|
||||
'''Converts CSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import csv, parquet
|
||||
|
||||
table = csv.read_csv(data_path)
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
convert_csv_to_apache_parquet,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
name: Convert csv to apache parquet
|
||||
description: |-
|
||||
Converts CSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: CSV}
|
||||
outputs:
|
||||
- {name: output_data, type: ApacheParquet}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_csv_to_apache_parquet(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts CSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import csv, parquet
|
||||
|
||||
table = csv.read_csv(data_path)
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
_output_files = _parsed_args.pop("_output_paths", [])
|
||||
|
||||
_outputs = convert_csv_to_apache_parquet(**_parsed_args)
|
||||
|
||||
_output_serializers = [
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
for idx, output_file in enumerate(_output_files):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(output_file))
|
||||
except OSError:
|
||||
pass
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(_output_serializers[idx](_outputs[idx]))
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_tsv_to_apache_parquet(
|
||||
data_path: InputPath('TSV'),
|
||||
output_data_path: OutputPath('ApacheParquet'),
|
||||
):
|
||||
'''Converts TSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import csv, parquet
|
||||
|
||||
table = csv.read_csv(data_path, parse_options=csv.ParseOptions(delimiter='\t'))
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_component_from_func(
|
||||
convert_tsv_to_apache_parquet,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_TSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
name: Convert tsv to apache parquet
|
||||
description: |-
|
||||
Converts TSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: TSV}
|
||||
outputs:
|
||||
- {name: output_data, type: ApacheParquet}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_TSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_tsv_to_apache_parquet(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts TSV table to Apache Parquet.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import csv, parquet
|
||||
|
||||
table = csv.read_csv(data_path, parse_options=csv.ParseOptions(delimiter='\t'))
|
||||
parquet.write_table(table, output_data_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert tsv to apache parquet', description='Converts TSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
_output_files = _parsed_args.pop("_output_paths", [])
|
||||
|
||||
_outputs = convert_tsv_to_apache_parquet(**_parsed_args)
|
||||
|
||||
_output_serializers = [
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
for idx, output_file in enumerate(_output_files):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(output_file))
|
||||
except OSError:
|
||||
pass
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(_output_serializers[idx](_outputs[idx]))
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_apache_parquet_to_apache_arrow_feather(
|
||||
data_path: InputPath('ApacheParquet'),
|
||||
output_data_path: OutputPath('ApacheArrowFeather'),
|
||||
):
|
||||
'''Converts Apache Parquet to Apache Arrow Feather.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import feather, parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
feather.write_feather(data_frame, output_data_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_apache_parquet_to_apache_arrow_feather_op = create_component_from_func(
|
||||
convert_apache_parquet_to_apache_arrow_feather,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
name: Convert apache parquet to apache arrow feather
|
||||
description: |-
|
||||
Converts Apache Parquet to Apache Arrow Feather.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: ApacheParquet}
|
||||
outputs:
|
||||
- {name: output_data, type: ApacheArrowFeather}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_apache_parquet_to_apache_arrow_feather(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts Apache Parquet to Apache Arrow Feather.
|
||||
|
||||
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import feather, parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
feather.write_feather(data_frame, output_data_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert apache parquet to apache arrow feather', description='Converts Apache Parquet to Apache Arrow Feather.\n\n [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
_output_files = _parsed_args.pop("_output_paths", [])
|
||||
|
||||
_outputs = convert_apache_parquet_to_apache_arrow_feather(**_parsed_args)
|
||||
|
||||
_output_serializers = [
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
for idx, output_file in enumerate(_output_files):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(output_file))
|
||||
except OSError:
|
||||
pass
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(_output_serializers[idx](_outputs[idx]))
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_apache_parquet_to_csv(
|
||||
data_path: InputPath('ApacheParquet'),
|
||||
output_data_path: OutputPath('CSV'),
|
||||
):
|
||||
'''Converts Apache Parquet to CSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
data_frame.to_csv(
|
||||
output_data_path,
|
||||
index=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_apache_parquet_to_csv_op = create_component_from_func(
|
||||
convert_apache_parquet_to_csv,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
name: Convert apache parquet to csv
|
||||
description: |-
|
||||
Converts Apache Parquet to CSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: ApacheParquet}
|
||||
outputs:
|
||||
- {name: output_data, type: CSV}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_CSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_apache_parquet_to_csv(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts Apache Parquet to CSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
data_frame.to_csv(
|
||||
output_data_path,
|
||||
index=False,
|
||||
)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert apache parquet to csv', description='Converts Apache Parquet to CSV.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = convert_apache_parquet_to_csv(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def convert_apache_parquet_to_tsv(
|
||||
data_path: InputPath('ApacheParquet'),
|
||||
output_data_path: OutputPath('TSV'),
|
||||
):
|
||||
'''Converts Apache Parquet to TSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
data_frame.to_csv(
|
||||
output_data_path,
|
||||
index=False,
|
||||
sep='\t',
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_apache_parquet_to_tsv_op = create_component_from_func(
|
||||
convert_apache_parquet_to_tsv,
|
||||
output_component_file='component.yaml',
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_TSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
name: Convert apache parquet to tsv
|
||||
description: |-
|
||||
Converts Apache Parquet to TSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
inputs:
|
||||
- {name: data, type: ApacheParquet}
|
||||
outputs:
|
||||
- {name: output_data, type: TSV}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_TSV/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_apache_parquet_to_tsv(
|
||||
data_path,
|
||||
output_data_path,
|
||||
):
|
||||
'''Converts Apache Parquet to TSV.
|
||||
|
||||
[Apache Parquet](https://parquet.apache.org/)
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pyarrow import parquet
|
||||
|
||||
data_frame = parquet.read_pandas(data_path).to_pandas()
|
||||
data_frame.to_csv(
|
||||
output_data_path,
|
||||
index=False,
|
||||
sep='\t',
|
||||
)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert apache parquet to tsv', description='Converts Apache Parquet to TSV.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = convert_apache_parquet_to_tsv(**_parsed_args)
|
||||
args:
|
||||
- --data
|
||||
- {inputPath: data}
|
||||
- --output-data
|
||||
- {outputPath: output_data}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
from kfp.components import create_component_from_func, InputPath, OutputPath
|
||||
|
||||
def keras_convert_hdf5_model_to_tf_saved_model(
|
||||
model_path: InputPath('KerasModelHdf5'),
|
||||
converted_model_path: OutputPath('TensorflowSavedModel'),
|
||||
):
|
||||
'''Converts Keras HDF5 model to Tensorflow SavedModel format.
|
||||
|
||||
Args:
|
||||
model_path: Keras model in HDF5 format.
|
||||
converted_model_path: Keras model in Tensorflow SavedModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
from tensorflow import keras
|
||||
|
||||
model = keras.models.load_model(filepath=model_path)
|
||||
keras.models.save_model(model=model, filepath=converted_model_path, save_format='tf')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
keras_convert_hdf5_model_to_tf_saved_model_op = create_component_from_func(
|
||||
keras_convert_hdf5_model_to_tf_saved_model,
|
||||
base_image='tensorflow/tensorflow:2.3.0',
|
||||
packages_to_install=['h5py==2.10.0'],
|
||||
output_component_file='component.yaml',
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
name: Keras convert hdf5 model to tf saved model
|
||||
description: Converts Keras HDF5 model to Tensorflow SavedModel format.
|
||||
inputs:
|
||||
- {name: model, type: KerasModelHdf5, description: Keras model in HDF5 format.}
|
||||
outputs:
|
||||
- {name: converted_model, type: TensorflowSavedModel, description: Keras model in Tensorflow SavedModel format.}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'h5py==2.10.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
|
||||
--no-warn-script-location 'h5py==2.10.0' --user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def keras_convert_hdf5_model_to_tf_saved_model(
|
||||
model_path,
|
||||
converted_model_path,
|
||||
):
|
||||
'''Converts Keras HDF5 model to Tensorflow SavedModel format.
|
||||
|
||||
Args:
|
||||
model_path: Keras model in HDF5 format.
|
||||
converted_model_path: Keras model in Tensorflow SavedModel format.
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
'''
|
||||
from pathlib import Path
|
||||
from tensorflow import keras
|
||||
|
||||
model = keras.models.load_model(filepath=model_path)
|
||||
keras.models.save_model(model=model, filepath=converted_model_path, save_format='tf')
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Keras convert hdf5 model to tf saved model', description='Converts Keras HDF5 model to Tensorflow SavedModel format.')
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = keras_convert_hdf5_model_to_tf_saved_model(**_parsed_args)
|
||||
args:
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- --converted-model
|
||||
- {outputPath: converted_model}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: To ONNX from Keras HDF5 model
|
||||
inputs:
|
||||
- {name: Model, type: KerasModelHdf5}
|
||||
outputs:
|
||||
- {name: Model, type: OnnxModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/from_KerasModelHdf5/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- python3 -m pip install tf2onnx==1.6.3 && "$0" "$@"
|
||||
- python3
|
||||
- -m
|
||||
- tf2onnx.convert
|
||||
- --keras
|
||||
- {inputPath: Model}
|
||||
- --output
|
||||
- {outputPath: Model}
|
||||
- --fold_const
|
||||
- --verbose
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: To ONNX from Tensorflow SavedModel
|
||||
inputs:
|
||||
- {name: Model, type: TensorflowSavedModel}
|
||||
outputs:
|
||||
- {name: Model, type: OnnxModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/from_TensorflowSavedModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- python3 -m pip install tf2onnx==1.6.3 && "$0" "$@"
|
||||
- python3
|
||||
- -m
|
||||
- tf2onnx.convert
|
||||
- --saved-model
|
||||
- {inputPath: Model}
|
||||
- --output
|
||||
- {outputPath: Model}
|
||||
- --fold_const
|
||||
- --verbose
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
from kfp.components import create_component_from_func, InputPath, OutputPath
|
||||
|
||||
|
||||
def convert_to_tensorflow_saved_model_from_onnx_model(
|
||||
model_path: InputPath('OnnxModel'),
|
||||
converted_model_path: OutputPath('TensorflowSavedModel'),
|
||||
):
|
||||
import onnx
|
||||
import onnx_tf
|
||||
|
||||
onnx_model = onnx.load(model_path)
|
||||
tf_rep = onnx_tf.backend.prepare(onnx_model)
|
||||
tf_rep.export_graph(converted_model_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
convert_to_tensorflow_saved_model_from_onnx_model_op = create_component_from_func(
|
||||
convert_to_tensorflow_saved_model_from_onnx_model,
|
||||
output_component_file='component.yaml',
|
||||
base_image='tensorflow/tensorflow:2.4.1',
|
||||
packages_to_install=['onnx-tf==1.7.0', 'onnx==1.8.0'], # onnx-tf==1.7.0 is not compatible with onnx==1.8.1
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/to_TensorflowSavedModel/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
name: Convert to tensorflow saved model from onnx model
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/to_TensorflowSavedModel/component.yaml'
|
||||
inputs:
|
||||
- {name: model, type: OnnxModel}
|
||||
outputs:
|
||||
- {name: converted_model, type: TensorflowModel}
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.4.1
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'onnx-tf==1.7.0' 'onnx==1.8.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m
|
||||
pip install --quiet --no-warn-script-location 'onnx-tf==1.7.0' 'onnx==1.8.0'
|
||||
--user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def convert_to_tensorflow_saved_model_from_onnx_model(
|
||||
model_path,
|
||||
converted_model_path,
|
||||
):
|
||||
import onnx
|
||||
import onnx_tf
|
||||
|
||||
onnx_model = onnx.load(model_path)
|
||||
tf_rep = onnx_tf.backend.prepare(onnx_model)
|
||||
tf_rep.export_graph(converted_model_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Convert to tensorflow saved model from onnx model', description='')
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = convert_to_tensorflow_saved_model_from_onnx_model(**_parsed_args)
|
||||
args:
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- --converted-model
|
||||
- {outputPath: converted_model}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/_converters](https://github.com/kubeflow/pipelines/tree/master/components/contrib/_converters). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: Convert Keras HDF5 model to Tensorflow JS GraphModel
|
||||
inputs:
|
||||
- {name: Model, type: KerasModelHdf5}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowJSGraphModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
|
||||
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
|
||||
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
|
||||
"$0" "$*"
|
||||
- tensorflowjs_converter
|
||||
- --input_format=keras
|
||||
- --output_format=tfjs_graph_model
|
||||
- inputPath: Model
|
||||
- outputPath: Model
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: Convert Tensorflow SavedModel to Tensorflow JS GraphModel
|
||||
inputs:
|
||||
- {name: Model, type: TensorflowSavedModel}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowJSGraphModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
|
||||
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
|
||||
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
|
||||
"$0" "$*"
|
||||
- tensorflowjs_converter
|
||||
- --input_format=tf_saved_model
|
||||
- --output_format=tfjs_graph_model
|
||||
- inputPath: Model
|
||||
- outputPath: Model
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: Convert Keras HDF5 model to Tensorflow JS LayersModel
|
||||
inputs:
|
||||
- {name: Model, type: KerasModelHdf5}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowJSLayersModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
|
||||
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
|
||||
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
|
||||
"$0" "$*"
|
||||
- tensorflowjs_converter
|
||||
- --input_format=keras
|
||||
- --output_format=tfjs_layers_model
|
||||
- inputPath: Model
|
||||
- outputPath: Model
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
name: Convert Keras SavedModel to Tensorflow JS LayersModel
|
||||
inputs:
|
||||
- {name: Model, type: TensorflowSavedModel}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowJSLayersModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
|
||||
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
|
||||
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
|
||||
"$0" "$*"
|
||||
- tensorflowjs_converter
|
||||
- --input_format=keras_saved_model
|
||||
- --output_format=tfjs_layers_model
|
||||
- inputPath: Model
|
||||
- outputPath: Model
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
name: Convert Keras HDF5 model to Tensorflow Lite model
|
||||
inputs:
|
||||
- {name: Model, type: KerasModelHdf5}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowLiteModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowLiteModel/from_KerasModelHdf5/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
model_path="$0"
|
||||
output_model_path="$1"
|
||||
mkdir -p "$(dirname "$output_model_path")"
|
||||
|
||||
tflite_convert --keras_model_file "$model_path" --output_file "$output_model_path"
|
||||
- {inputPath: Model}
|
||||
- {outputPath: Model}
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
name: Convert Tensorflow SavedModel to Tensorflow Lite model
|
||||
inputs:
|
||||
- {name: Model, type: TensorflowSavedModel}
|
||||
outputs:
|
||||
- {name: Model, type: TensorflowLiteModel}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.3.0
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
model_path="$0"
|
||||
output_model_path="$1"
|
||||
mkdir -p "$(dirname "$output_model_path")"
|
||||
|
||||
tflite_convert --saved_model_dir "$model_path" --output_file "$output_model_path"
|
||||
- {inputPath: Model}
|
||||
- {outputPath: Model}
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
name: Calculate data hash
|
||||
inputs:
|
||||
- {name: Data}
|
||||
- {name: Hash algorithm, type: String, default: SHA256, description: "Hash algorithm to use. Supported values are MD5, SHA1, SHA256, SHA512, SHA3"}
|
||||
outputs:
|
||||
- {name: Hash}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/basics/Calculate_hash/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: alpine
|
||||
command:
|
||||
- sh
|
||||
- -exc
|
||||
- |
|
||||
data_path="$0"
|
||||
hash_algorithm="$1"
|
||||
hash_path="$2"
|
||||
mkdir -p "$(dirname "$hash_path")"
|
||||
|
||||
hash_algorithm=$(echo "$hash_algorithm" | tr '[:upper:]' '[:lower:]')
|
||||
case "$hash_algorithm" in
|
||||
md5|sha1|sha256|sha512|sha3) hash_program="${hash_algorithm}sum";;
|
||||
*) echo "Unsupported hash algorithm $hash_algorithm"; exit 1;;
|
||||
esac
|
||||
|
||||
if [ -d "$data_path" ]; then
|
||||
# Calculating hash for directory
|
||||
cd "$data_path"
|
||||
find . -type f -print0 |
|
||||
sort -z |
|
||||
xargs -0 "$hash_program" |
|
||||
"$hash_program" |
|
||||
cut -d ' ' -f 1 > "$hash_path"
|
||||
else
|
||||
# Calculating hash for file
|
||||
"$hash_program" "$data_path" |
|
||||
cut -d ' ' -f 1 > "$hash_path"
|
||||
fi
|
||||
- {inputPath: Data}
|
||||
- {inputValue: Hash algorithm}
|
||||
- {outputPath: Hash}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/basics](https://github.com/kubeflow/pipelines/tree/master/components/contrib/basics). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
#!/bin/bash -e
|
||||
# Copyright 2018 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
while getopts ":hp:t:i:l:" opt; do
|
||||
case "${opt}" in
|
||||
h) echo "-p: project name"
|
||||
echo "-t: tag name"
|
||||
echo "-i: image name. If provided, project name and tag name are not necessary"
|
||||
echo "-l: local image name."
|
||||
exit
|
||||
;;
|
||||
p) PROJECT_ID=${OPTARG}
|
||||
;;
|
||||
t) TAG_NAME=${OPTARG}
|
||||
;;
|
||||
i) IMAGE_NAME=${OPTARG}
|
||||
;;
|
||||
l) LOCAL_IMAGE_NAME=${OPTARG}
|
||||
;;
|
||||
\? ) echo "Usage: cmd [-p] project [-t] tag [-i] image [-l] local image"
|
||||
exit
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "${PROJECT_ID}" ]; then
|
||||
PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
|
||||
fi
|
||||
|
||||
if [ -z "${TAG_NAME}" ]; then
|
||||
TAG_NAME=$(date +v%Y%m%d)-$(git describe --tags --always --dirty)-$(git diff | shasum -a256 | cut -c -6)
|
||||
fi
|
||||
|
||||
if [ -z "${IMAGE_NAME}" ]; then
|
||||
docker pull gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest || true
|
||||
fi
|
||||
|
||||
docker build -t ${LOCAL_IMAGE_NAME} . --cache-from gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
|
||||
if [ -z "${IMAGE_NAME}" ]; then
|
||||
docker tag ${LOCAL_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
|
||||
docker tag ${LOCAL_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
|
||||
docker push gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
|
||||
docker push gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
|
||||
else
|
||||
docker tag ${LOCAL_IMAGE_NAME} "${IMAGE_NAME}"
|
||||
docker push "${IMAGE_NAME}"
|
||||
fi
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/dataset_manipulation](https://github.com/kubeflow/pipelines/tree/master/components/contrib/dataset_manipulation). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath, create_component_from_func
|
||||
|
||||
def split_table_into_folds(
|
||||
table_path: InputPath('CSV'),
|
||||
|
||||
train_1_path: OutputPath('CSV'),
|
||||
train_2_path: OutputPath('CSV'),
|
||||
train_3_path: OutputPath('CSV'),
|
||||
train_4_path: OutputPath('CSV'),
|
||||
train_5_path: OutputPath('CSV'),
|
||||
|
||||
test_1_path: OutputPath('CSV'),
|
||||
test_2_path: OutputPath('CSV'),
|
||||
test_3_path: OutputPath('CSV'),
|
||||
test_4_path: OutputPath('CSV'),
|
||||
test_5_path: OutputPath('CSV'),
|
||||
|
||||
number_of_folds: int = 5,
|
||||
random_seed: int = 0,
|
||||
):
|
||||
"""Splits the data table into the specified number of folds.
|
||||
|
||||
The data is split into the specified number of folds k (default: 5).
|
||||
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
|
||||
Each training subsample has (k-1)/k fraction of samples.
|
||||
The train_i subsample is produced by excluding test_i subsample form all samples.
|
||||
|
||||
Inputs:
|
||||
table: The data to split by rows
|
||||
number_of_folds: Number of folds to split data into
|
||||
random_seed: Random seed for reproducible splitting
|
||||
|
||||
Outputs:
|
||||
train_i: The i-th training subsample
|
||||
test_i: The i-th testing subsample
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
|
||||
"""
|
||||
import pandas
|
||||
from sklearn import model_selection
|
||||
|
||||
max_number_of_folds = 5
|
||||
|
||||
if number_of_folds < 1 or number_of_folds > max_number_of_folds:
|
||||
raise ValueError('Number of folds must be between 1 and {}.'.format(max_number_of_folds))
|
||||
|
||||
df = pandas.read_csv(
|
||||
table_path,
|
||||
)
|
||||
splitter = model_selection.KFold(
|
||||
n_splits=number_of_folds,
|
||||
shuffle=True,
|
||||
random_state=random_seed,
|
||||
)
|
||||
folds = list(splitter.split(df))
|
||||
|
||||
fold_paths = [
|
||||
(train_1_path, test_1_path),
|
||||
(train_2_path, test_2_path),
|
||||
(train_3_path, test_3_path),
|
||||
(train_4_path, test_4_path),
|
||||
(train_5_path, test_5_path),
|
||||
]
|
||||
|
||||
for i in range(max_number_of_folds):
|
||||
(train_path, test_path) = fold_paths[i]
|
||||
if i < len(folds):
|
||||
(train_indices, test_indices) = folds[i]
|
||||
train_fold = df.iloc[train_indices]
|
||||
test_fold = df.iloc[test_indices]
|
||||
else:
|
||||
train_fold = df.iloc[0:0]
|
||||
test_fold = df.iloc[0:0]
|
||||
train_fold.to_csv(train_path, index=False)
|
||||
test_fold.to_csv(test_path, index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
split_table_into_folds_op = create_component_from_func(
|
||||
split_table_into_folds,
|
||||
base_image='python:3.7',
|
||||
packages_to_install=['scikit-learn==0.23.1', 'pandas==1.0.5'],
|
||||
output_component_file='component.yaml',
|
||||
annotations={
|
||||
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml",
|
||||
},
|
||||
)
|
||||
|
|
@ -1,185 +0,0 @@
|
|||
name: Split table into folds
|
||||
description: |-
|
||||
Splits the data table into the specified number of folds.
|
||||
|
||||
The data is split into the specified number of folds k (default: 5).
|
||||
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
|
||||
Each training subsample has (k-1)/k fraction of samples.
|
||||
The train_i subsample is produced by excluding test_i subsample form all samples.
|
||||
|
||||
Inputs:
|
||||
table: The data to split by rows
|
||||
number_of_folds: Number of folds to split data into
|
||||
random_seed: Random seed for reproducible splitting
|
||||
|
||||
Outputs:
|
||||
train_i: The i-th training subsample
|
||||
test_i: The i-th testing subsample
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'
|
||||
inputs:
|
||||
- {name: table, type: CSV}
|
||||
- {name: number_of_folds, type: Integer, default: '5', optional: true}
|
||||
- {name: random_seed, type: Integer, default: '0', optional: true}
|
||||
outputs:
|
||||
- {name: train_1, type: CSV}
|
||||
- {name: train_2, type: CSV}
|
||||
- {name: train_3, type: CSV}
|
||||
- {name: train_4, type: CSV}
|
||||
- {name: train_5, type: CSV}
|
||||
- {name: test_1, type: CSV}
|
||||
- {name: test_2, type: CSV}
|
||||
- {name: test_3, type: CSV}
|
||||
- {name: test_4, type: CSV}
|
||||
- {name: test_5, type: CSV}
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.7
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'scikit-learn==0.23.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
|
||||
-m pip install --quiet --no-warn-script-location 'scikit-learn==0.23.1' 'pandas==1.0.5'
|
||||
--user) && "$0" "$@"
|
||||
- python3
|
||||
- -u
|
||||
- -c
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def split_table_into_folds(
|
||||
table_path,
|
||||
|
||||
train_1_path,
|
||||
train_2_path,
|
||||
train_3_path,
|
||||
train_4_path,
|
||||
train_5_path,
|
||||
|
||||
test_1_path,
|
||||
test_2_path,
|
||||
test_3_path,
|
||||
test_4_path,
|
||||
test_5_path,
|
||||
|
||||
number_of_folds = 5,
|
||||
random_seed = 0,
|
||||
):
|
||||
"""Splits the data table into the specified number of folds.
|
||||
|
||||
The data is split into the specified number of folds k (default: 5).
|
||||
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
|
||||
Each training subsample has (k-1)/k fraction of samples.
|
||||
The train_i subsample is produced by excluding test_i subsample form all samples.
|
||||
|
||||
Inputs:
|
||||
table: The data to split by rows
|
||||
number_of_folds: Number of folds to split data into
|
||||
random_seed: Random seed for reproducible splitting
|
||||
|
||||
Outputs:
|
||||
train_i: The i-th training subsample
|
||||
test_i: The i-th testing subsample
|
||||
|
||||
Annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
|
||||
"""
|
||||
import pandas
|
||||
from sklearn import model_selection
|
||||
|
||||
max_number_of_folds = 5
|
||||
|
||||
if number_of_folds < 1 or number_of_folds > max_number_of_folds:
|
||||
raise ValueError('Number of folds must be between 1 and {}.'.format(max_number_of_folds))
|
||||
|
||||
df = pandas.read_csv(
|
||||
table_path,
|
||||
)
|
||||
splitter = model_selection.KFold(
|
||||
n_splits=number_of_folds,
|
||||
shuffle=True,
|
||||
random_state=random_seed,
|
||||
)
|
||||
folds = list(splitter.split(df))
|
||||
|
||||
fold_paths = [
|
||||
(train_1_path, test_1_path),
|
||||
(train_2_path, test_2_path),
|
||||
(train_3_path, test_3_path),
|
||||
(train_4_path, test_4_path),
|
||||
(train_5_path, test_5_path),
|
||||
]
|
||||
|
||||
for i in range(max_number_of_folds):
|
||||
(train_path, test_path) = fold_paths[i]
|
||||
if i < len(folds):
|
||||
(train_indices, test_indices) = folds[i]
|
||||
train_fold = df.iloc[train_indices]
|
||||
test_fold = df.iloc[test_indices]
|
||||
else:
|
||||
train_fold = df.iloc[0:0]
|
||||
test_fold = df.iloc[0:0]
|
||||
train_fold.to_csv(train_path, index=False)
|
||||
test_fold.to_csv(test_path, index=False)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Split table into folds', description='Splits the data table into the specified number of folds.\n\n The data is split into the specified number of folds k (default: 5).\n Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.\n Each training subsample has (k-1)/k fraction of samples.\n The train_i subsample is produced by excluding test_i subsample form all samples.\n\n Inputs:\n table: The data to split by rows\n number_of_folds: Number of folds to split data into\n random_seed: Random seed for reproducible splitting\n\n Outputs:\n train_i: The i-th training subsample\n test_i: The i-th testing subsample\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
|
||||
_parser.add_argument("--table", dest="table_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--number-of-folds", dest="number_of_folds", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--train-1", dest="train_1_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--train-2", dest="train_2_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--train-3", dest="train_3_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--train-4", dest="train_4_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--train-5", dest="train_5_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--test-1", dest="test_1_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--test-2", dest="test_2_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--test-3", dest="test_3_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--test-4", dest="test_4_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--test-5", dest="test_5_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = split_table_into_folds(**_parsed_args)
|
||||
args:
|
||||
- --table
|
||||
- {inputPath: table}
|
||||
- if:
|
||||
cond: {isPresent: number_of_folds}
|
||||
then:
|
||||
- --number-of-folds
|
||||
- {inputValue: number_of_folds}
|
||||
- if:
|
||||
cond: {isPresent: random_seed}
|
||||
then:
|
||||
- --random-seed
|
||||
- {inputValue: random_seed}
|
||||
- --train-1
|
||||
- {outputPath: train_1}
|
||||
- --train-2
|
||||
- {outputPath: train_2}
|
||||
- --train-3
|
||||
- {outputPath: train_3}
|
||||
- --train-4
|
||||
- {outputPath: train_4}
|
||||
- --train-5
|
||||
- {outputPath: train_5}
|
||||
- --test-1
|
||||
- {outputPath: test_1}
|
||||
- --test-2
|
||||
- {outputPath: test_2}
|
||||
- --test-3
|
||||
- {outputPath: test_3}
|
||||
- --test-4
|
||||
- {outputPath: test_4}
|
||||
- --test-5
|
||||
- {outputPath: test_5}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
name: Chicago Taxi Trips dataset
|
||||
description: |
|
||||
City of Chicago Taxi Trips dataset: https://data.cityofchicago.org/Transportation/Taxi-Trips/wrvz-psew
|
||||
|
||||
The input parameters configure the SQL query to the database.
|
||||
The dataset is pretty big, so limit the number of results using the `Limit` or `Where` parameters.
|
||||
Read [Socrata dev](https://dev.socrata.com/docs/queries/) for the advanced query syntax
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/Chicago_Taxi_Trips/component.yaml'
|
||||
inputs:
|
||||
- {name: Where, type: String, default: 'trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"'}
|
||||
- {name: Limit, type: Integer, default: '1000', description: 'Number of rows to return. The rows are randomly sampled.'}
|
||||
- {name: Select, type: String, default: 'trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location'}
|
||||
- {name: Format, type: String, default: 'csv', description: 'Output data format. Suports csv,tsv,cml,rdf,json'}
|
||||
outputs:
|
||||
- {name: Table, description: 'Result type depends on format. CSV and TSV have header.'}
|
||||
implementation:
|
||||
container:
|
||||
# image: curlimages/curl # Sets a non-root user which cannot write to mounted volumes. See https://github.com/curl/curl-docker/issues/22
|
||||
image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
set -e -x -o pipefail
|
||||
output_path="$0"
|
||||
select="$1"
|
||||
where="$2"
|
||||
limit="$3"
|
||||
format="$4"
|
||||
mkdir -p "$(dirname "$output_path")"
|
||||
curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'"${format}" \
|
||||
--data-urlencode '$limit='"${limit}" \
|
||||
--data-urlencode '$where='"${where}" \
|
||||
--data-urlencode '$select='"${select}" \
|
||||
| tr -d '"' > "$output_path" # Removing unneeded quotes around all numbers
|
||||
- {outputPath: Table}
|
||||
- {inputValue: Select}
|
||||
- {inputValue: Where}
|
||||
- {inputValue: Limit}
|
||||
- {inputValue: Format}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
from typing import NamedTuple
|
||||
|
||||
from kfp.components import create_component_from_func, OutputPath
|
||||
|
||||
|
||||
def load_dataset_using_huggingface(
|
||||
dataset_name: str,
|
||||
dataset_dict_path: OutputPath('HuggingFaceDatasetDict'),
|
||||
) -> NamedTuple('Outputs', [
|
||||
('splits', list),
|
||||
]):
|
||||
from datasets import load_dataset
|
||||
|
||||
dataset_dict = load_dataset(dataset_name)
|
||||
dataset_dict.save_to_disk(dataset_dict_path)
|
||||
splits = list(dataset_dict.keys())
|
||||
return (splits,)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
load_dataset_op = create_component_from_func(
|
||||
load_dataset_using_huggingface,
|
||||
base_image='python:3.9',
|
||||
packages_to_install=['datasets==1.6.2'],
|
||||
annotations={
|
||||
'author': 'Alexey Volkov <alexey.volkov@ark-kun.com>',
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Load_dataset/component.yaml",
|
||||
},
|
||||
output_component_file='component.yaml',
|
||||
)
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
name: Load dataset using huggingface
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Load_dataset/component.yaml'
|
||||
inputs:
|
||||
- {name: dataset_name, type: String}
|
||||
outputs:
|
||||
- {name: dataset_dict, type: HuggingFaceDatasetDict}
|
||||
- {name: splits, type: JsonArray}
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.9
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'datasets==1.6.2' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'datasets==1.6.2' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def load_dataset_using_huggingface(
|
||||
dataset_name,
|
||||
dataset_dict_path,
|
||||
):
|
||||
from datasets import load_dataset
|
||||
|
||||
dataset_dict = load_dataset(dataset_name)
|
||||
dataset_dict.save_to_disk(dataset_dict_path)
|
||||
splits = list(dataset_dict.keys())
|
||||
return (splits,)
|
||||
|
||||
def _serialize_json(obj) -> str:
|
||||
if isinstance(obj, str):
|
||||
return obj
|
||||
import json
|
||||
def default_serializer(obj):
|
||||
if hasattr(obj, 'to_struct'):
|
||||
return obj.to_struct()
|
||||
else:
|
||||
raise TypeError("Object of type '%s' is not JSON serializable and does not have .to_struct() method." % obj.__class__.__name__)
|
||||
return json.dumps(obj, default=default_serializer, sort_keys=True)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Load dataset using huggingface', description='')
|
||||
_parser.add_argument("--dataset-name", dest="dataset_name", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--dataset-dict", dest="dataset_dict_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
_output_files = _parsed_args.pop("_output_paths", [])
|
||||
|
||||
_outputs = load_dataset_using_huggingface(**_parsed_args)
|
||||
|
||||
_output_serializers = [
|
||||
_serialize_json,
|
||||
|
||||
]
|
||||
|
||||
import os
|
||||
for idx, output_file in enumerate(_output_files):
|
||||
try:
|
||||
os.makedirs(os.path.dirname(output_file))
|
||||
except OSError:
|
||||
pass
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(_output_serializers[idx](_outputs[idx]))
|
||||
args:
|
||||
- --dataset-name
|
||||
- {inputValue: dataset_name}
|
||||
- --dataset-dict
|
||||
- {outputPath: dataset_dict}
|
||||
- '----output-paths'
|
||||
- {outputPath: splits}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
from kfp.components import create_component_from_func, InputPath, OutputPath
|
||||
|
||||
|
||||
def split_dataset_huggingface(
|
||||
dataset_dict_path: InputPath('HuggingFaceDatasetDict'),
|
||||
dataset_split_path: OutputPath('HuggingFaceDataset'),
|
||||
dataset_path: OutputPath('HuggingFaceArrowDataset'),
|
||||
# dataset_indices_path: OutputPath('HuggingFaceArrowDataset'),
|
||||
dataset_info_path: OutputPath(dict),
|
||||
dataset_state_path: OutputPath(dict),
|
||||
split_name: str = None,
|
||||
):
|
||||
import os
|
||||
import shutil
|
||||
from datasets import config as datasets_config
|
||||
|
||||
print(f'DatasetDict contents: {os.listdir(dataset_dict_path)}')
|
||||
shutil.copytree(os.path.join(dataset_dict_path, split_name), dataset_split_path)
|
||||
print(f'Dataset contents: {os.listdir(os.path.join(dataset_dict_path, split_name))}')
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_ARROW_FILENAME), dataset_path)
|
||||
# shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INDICES_FILENAME), dataset_indices_path)
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INFO_FILENAME), dataset_info_path)
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_STATE_JSON_FILENAME), dataset_state_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
split_dataset_op = create_component_from_func(
|
||||
split_dataset_huggingface,
|
||||
base_image='python:3.9',
|
||||
packages_to_install=['datasets==1.6.2'],
|
||||
annotations={
|
||||
'author': 'Alexey Volkov <alexey.volkov@ark-kun.com>',
|
||||
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Split_dataset/component.yaml",
|
||||
},
|
||||
output_component_file='component.yaml',
|
||||
)
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
name: Split dataset huggingface
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Split_dataset/component.yaml'
|
||||
inputs:
|
||||
- {name: dataset_dict, type: HuggingFaceDatasetDict}
|
||||
- {name: split_name, type: String, optional: true}
|
||||
outputs:
|
||||
- {name: dataset_split, type: HuggingFaceDataset}
|
||||
- {name: dataset, type: HuggingFaceArrowDataset}
|
||||
- {name: dataset_info, type: JsonObject}
|
||||
- {name: dataset_state, type: JsonObject}
|
||||
implementation:
|
||||
container:
|
||||
image: python:3.9
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'datasets==1.6.2' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
|
||||
--quiet --no-warn-script-location 'datasets==1.6.2' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def split_dataset_huggingface(
|
||||
dataset_dict_path,
|
||||
dataset_split_path,
|
||||
dataset_path,
|
||||
# dataset_indices_path: OutputPath('HuggingFaceArrowDataset'),
|
||||
dataset_info_path,
|
||||
dataset_state_path,
|
||||
split_name = None,
|
||||
):
|
||||
import os
|
||||
import shutil
|
||||
from datasets import config as datasets_config
|
||||
|
||||
print(f'DatasetDict contents: {os.listdir(dataset_dict_path)}')
|
||||
shutil.copytree(os.path.join(dataset_dict_path, split_name), dataset_split_path)
|
||||
print(f'Dataset contents: {os.listdir(os.path.join(dataset_dict_path, split_name))}')
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_ARROW_FILENAME), dataset_path)
|
||||
# shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INDICES_FILENAME), dataset_indices_path)
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INFO_FILENAME), dataset_info_path)
|
||||
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_STATE_JSON_FILENAME), dataset_state_path)
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Split dataset huggingface', description='')
|
||||
_parser.add_argument("--dataset-dict", dest="dataset_dict_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--split-name", dest="split_name", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--dataset-split", dest="dataset_split_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--dataset", dest="dataset_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--dataset-info", dest="dataset_info_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--dataset-state", dest="dataset_state_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = split_dataset_huggingface(**_parsed_args)
|
||||
args:
|
||||
- --dataset-dict
|
||||
- {inputPath: dataset_dict}
|
||||
- if:
|
||||
cond: {isPresent: split_name}
|
||||
then:
|
||||
- --split-name
|
||||
- {inputValue: split_name}
|
||||
- --dataset-split
|
||||
- {outputPath: dataset_split}
|
||||
- --dataset
|
||||
- {outputPath: dataset}
|
||||
- --dataset-info
|
||||
- {outputPath: dataset_info}
|
||||
- --dataset-state
|
||||
- {outputPath: dataset_state}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
from kfp import components
|
||||
from kfp import dsl
|
||||
|
||||
|
||||
load_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/d0e14a1dad4b851ad2a60a0c1a8201493f3d931c/components/datasets/HuggingFace/Load_dataset/component.yaml')
|
||||
split_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/d0e14a1dad4b851ad2a60a0c1a8201493f3d931c/components/datasets/HuggingFace/Split_dataset/component.yaml')
|
||||
|
||||
|
||||
def huggingface_pipeline():
|
||||
dataset_dict_task = load_dataset_op(dataset_name='imdb')
|
||||
with dsl.ParallelFor(dataset_dict_task.outputs['splits']) as split_name:
|
||||
deataset_task = split_dataset_op(
|
||||
dataset_dict=dataset_dict_task.outputs['dataset_dict'],
|
||||
split_name=split_name,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import kfp
|
||||
kfp_endpoint = None
|
||||
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(
|
||||
huggingface_pipeline,
|
||||
arguments={}
|
||||
)
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
# Deprecation Warning
|
||||
|
||||
The components in this directory is now moved to [components/contrib/datasets](https://github.com/kubeflow/pipelines/tree/master/components/contrib/datasets). This directory will be removed by the end of 2021.
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
name: Predict using TF on Dataflow
|
||||
description: |
|
||||
Runs TensorFlow prediction on Google Cloud Dataflow
|
||||
Input and output data is in GCS
|
||||
inputs:
|
||||
- {name: Data file pattern, type: GCSPath, description: 'GCS or local path of test file patterns.'} # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: TFDV schema JSON}}
|
||||
- {name: Target column, type: String, description: 'Name of the column for prediction target.'}
|
||||
- {name: Model, type: GCSPath, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
|
||||
- {name: Batch size, type: Integer, default: '32', description: 'Batch size used in prediction.'}
|
||||
- {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".'}
|
||||
- {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'}
|
||||
- {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
|
||||
outputs:
|
||||
- {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
|
||||
- {name: MLPipeline UI metadata, type: UI metadata}
|
||||
implementation:
|
||||
container:
|
||||
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:57d9f7f1cfd458e945d297957621716062d89a49
|
||||
command: [python2, /ml/predict.py]
|
||||
args: [
|
||||
--data, {inputValue: Data file pattern},
|
||||
--schema, {inputValue: Schema},
|
||||
--target, {inputValue: Target column},
|
||||
--model, {inputValue: Model},
|
||||
--mode, {inputValue: Run mode},
|
||||
--project, {inputValue: GCP project},
|
||||
--batchsize, {inputValue: Batch size},
|
||||
--output, {inputValue: Predictions dir},
|
||||
--prediction-results-uri-pattern-output-path, {outputPath: Predictions dir},
|
||||
--ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
|
||||
]
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
name: TFX - Data Validation
|
||||
description: |
|
||||
Runs Tensorflow Data Validation. https://www.tensorflow.org/tfx/data_validation/get_started
|
||||
Tensorflow Data Validation (TFDV) can analyze training and serving data to:
|
||||
* compute descriptive statistics,
|
||||
* infer a schema,
|
||||
* detect data anomalies.
|
||||
inputs:
|
||||
- {name: Inference data, type: GCSPath, description: GCS path of the CSV file from which to infer the schema.} # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Validation data, type: GCSPath, description: GCS path of the CSV file whose contents should be validated.} # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Column names, type: GCSPath, description: GCS json file containing a list of column names.} # type: {GCSPath: {data_type: JSON}}
|
||||
- {name: Key columns, type: String, description: Comma separated list of columns to treat as keys.}
|
||||
- {name: GCP project, type: GCPProjectID, default: '', description: The GCP project to run the dataflow job.}
|
||||
- {name: Run mode, type: String, default: local, description: Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud". }
|
||||
- {name: Validation output, type: GCSPath, description: GCS or local directory.} # type: {GCSPath: {path_type: Directory}}
|
||||
outputs:
|
||||
- {name: Schema, type: GCSPath, description: GCS path of the inferred schema JSON.} # type: {GCSPath: {data_type: TFDV schema JSON}}
|
||||
- {name: Validation result, type: String, description: Indicates whether anomalies were detected or not.}
|
||||
implementation:
|
||||
container:
|
||||
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:57d9f7f1cfd458e945d297957621716062d89a49
|
||||
command: [python2, /ml/validate.py]
|
||||
args: [
|
||||
--csv-data-for-inference, {inputValue: Inference data},
|
||||
--csv-data-to-validate, {inputValue: Validation data},
|
||||
--column-names, {inputValue: Column names},
|
||||
--key-columns, {inputValue: Key columns},
|
||||
--project, {inputValue: GCP project},
|
||||
--mode, {inputValue: Run mode},
|
||||
--output, {inputValue: Validation output},
|
||||
]
|
||||
fileOutputs:
|
||||
Schema: /schema.txt
|
||||
Validation result: /output_validation_result.txt
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
name: TFX - Analyze model
|
||||
description: |
|
||||
Runs Tensorflow Model Analysis. https://www.tensorflow.org/tfx/model_analysis/get_started
|
||||
TensorFlow Model Analysis allows you to perform model evaluations in the TFX pipeline, and view resultant metrics and plots in a Jupyter notebook. Specifically, it can provide:
|
||||
* metrics computed on entire training and holdout dataset, as well as next-day evaluations
|
||||
* tracking metrics over time
|
||||
* model quality performance on different feature slices
|
||||
inputs:
|
||||
- {name: Model, type: GCSPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
|
||||
- {name: Evaluation data, type: GCSPath, description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Schema, type: GCSPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}}
|
||||
- {name: Run mode, type: String, default: local, description: whether to run the job locally or in Cloud Dataflow.}
|
||||
- {name: GCP project, type: GCPProjectID, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'}
|
||||
- {name: Slice columns, type: String, description: Comma-separated list of columns on which to slice for analysis.}
|
||||
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}}
|
||||
outputs:
|
||||
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}}
|
||||
- {name: MLPipeline UI metadata, type: UI metadata}
|
||||
implementation:
|
||||
container:
|
||||
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:57d9f7f1cfd458e945d297957621716062d89a49
|
||||
command: [python2, /ml/model_analysis.py]
|
||||
args: [
|
||||
--model, {inputValue: Model},
|
||||
--eval, {inputValue: Evaluation data},
|
||||
--schema, {inputValue: Schema},
|
||||
--mode, {inputValue: Run mode},
|
||||
--project, {inputValue: GCP project},
|
||||
--slice-columns, {inputValue: Slice columns},
|
||||
--output, {inputValue: Analysis results dir},
|
||||
--output-dir-uri-output-path, {outputPath: Analysis results dir},
|
||||
--ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
|
||||
]
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
name: Transform using TF on Dataflow
|
||||
description: Runs TensorFlow Transform on Google Cloud Dataflow
|
||||
inputs:
|
||||
- {name: Training data file pattern, type: GCSPath, description: 'GCS path of train file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Evaluation data file pattern, type: GCSPath, description: 'GCS path of eval file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
|
||||
- {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}}
|
||||
- {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'}
|
||||
- {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".' }
|
||||
- {name: Preprocessing module, type: GCSPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}}
|
||||
- {name: Transformed data dir, type: GCSPath, description: 'GCS or local directory'} #Also supports local paths # type: {GCSPath: {path_type: Directory}}
|
||||
outputs:
|
||||
- {name: Transformed data dir, type: GCSPath} # type: {GCSPath: {path_type: Directory}}
|
||||
implementation:
|
||||
container:
|
||||
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:57d9f7f1cfd458e945d297957621716062d89a49
|
||||
command: [python2, /ml/transform.py]
|
||||
args: [
|
||||
--train, {inputValue: Training data file pattern},
|
||||
--eval, {inputValue: Evaluation data file pattern},
|
||||
--schema, {inputValue: Schema},
|
||||
--project, {inputValue: GCP project},
|
||||
--mode, {inputValue: Run mode},
|
||||
--preprocessing-module, {inputValue: Preprocessing module},
|
||||
--output, {inputValue: Transformed data dir},
|
||||
---output-dir-uri-output-path, {outputPath: Transformed data dir},
|
||||
]
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
from kfp.components import InputPath, OutputPath
|
||||
|
||||
def Evaluator(
|
||||
examples_path: InputPath('Examples'),
|
||||
evaluation_path: OutputPath('ModelEvaluation'),
|
||||
blessing_path: OutputPath('ModelBlessing'),
|
||||
model_path: InputPath('Model') = None,
|
||||
baseline_model_path: InputPath('Model') = None,
|
||||
schema_path: InputPath('Schema') = None,
|
||||
eval_config: {'JsonObject': {'data_type': 'proto:tensorflow_model_analysis.EvalConfig'}} = None,
|
||||
feature_slicing_spec: {'JsonObject': {'data_type': 'proto:tfx.components.evaluator.FeatureSlicingSpec'}} = None,
|
||||
fairness_indicator_thresholds: list = None,
|
||||
example_splits: str = None,
|
||||
module_file: str = None,
|
||||
module_path: str = None,
|
||||
):
|
||||
from tfx.components.evaluator.component import Evaluator as component_class
|
||||
|
||||
#Generated code
|
||||
import os
|
||||
import tempfile
|
||||
from tensorflow.io import gfile
|
||||
from google.protobuf import json_format, message
|
||||
from tfx.types import channel_utils, artifact_utils
|
||||
from tfx.components.base import base_executor
|
||||
|
||||
arguments = locals().copy()
|
||||
|
||||
component_class_args = {}
|
||||
|
||||
for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items():
|
||||
argument_value = arguments.get(name, None)
|
||||
if argument_value is None:
|
||||
continue
|
||||
parameter_type = execution_parameter.type
|
||||
if isinstance(parameter_type, type) and issubclass(parameter_type, message.Message):
|
||||
argument_value_obj = parameter_type()
|
||||
json_format.Parse(argument_value, argument_value_obj)
|
||||
else:
|
||||
argument_value_obj = argument_value
|
||||
component_class_args[name] = argument_value_obj
|
||||
|
||||
for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
|
||||
artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path')
|
||||
if artifact_path:
|
||||
artifact = channel_parameter.type()
|
||||
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
|
||||
if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
|
||||
# Recovering splits
|
||||
subdirs = gfile.listdir(artifact_path)
|
||||
# Workaround for https://github.com/tensorflow/tensorflow/issues/39167
|
||||
subdirs = [subdir.rstrip('/') for subdir in subdirs]
|
||||
split_names = [subdir.replace('Split-', '') for subdir in subdirs]
|
||||
artifact.split_names = artifact_utils.encode_split_names(sorted(split_names))
|
||||
component_class_args[name] = channel_utils.as_channel([artifact])
|
||||
|
||||
component_class_instance = component_class(**component_class_args)
|
||||
|
||||
input_dict = channel_utils.unwrap_channel_dict(component_class_instance.inputs.get_all())
|
||||
output_dict = {}
|
||||
exec_properties = component_class_instance.exec_properties
|
||||
|
||||
# Generating paths for output artifacts
|
||||
for name, channel in component_class_instance.outputs.items():
|
||||
artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path')
|
||||
if artifact_path:
|
||||
artifact = channel.type()
|
||||
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
|
||||
artifact_list = [artifact]
|
||||
channel._artifacts = artifact_list
|
||||
output_dict[name] = artifact_list
|
||||
|
||||
print('component instance: ' + str(component_class_instance))
|
||||
|
||||
executor_context = base_executor.BaseExecutor.Context(
|
||||
beam_pipeline_args=arguments.get('beam_pipeline_args'),
|
||||
tmp_dir=tempfile.gettempdir(),
|
||||
unique_id='tfx_component',
|
||||
)
|
||||
executor = component_class_instance.executor_spec.executor_class(executor_context)
|
||||
executor.Do(
|
||||
input_dict=input_dict,
|
||||
output_dict=output_dict,
|
||||
exec_properties=exec_properties,
|
||||
)
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
name: Evaluator
|
||||
inputs:
|
||||
- {name: examples, type: Examples}
|
||||
- {name: model, type: Model, optional: true}
|
||||
- {name: baseline_model, type: Model, optional: true}
|
||||
- {name: schema, type: Schema, optional: true}
|
||||
- name: eval_config
|
||||
type:
|
||||
JsonObject: {data_type: 'proto:tensorflow_model_analysis.EvalConfig'}
|
||||
optional: true
|
||||
- name: feature_slicing_spec
|
||||
type:
|
||||
JsonObject: {data_type: 'proto:tfx.components.evaluator.FeatureSlicingSpec'}
|
||||
optional: true
|
||||
- {name: fairness_indicator_thresholds, type: JsonArray, optional: true}
|
||||
- {name: example_splits, type: String, optional: true}
|
||||
- {name: module_file, type: String, optional: true}
|
||||
- {name: module_path, type: String, optional: true}
|
||||
outputs:
|
||||
- {name: evaluation, type: ModelEvaluation}
|
||||
- {name: blessing, type: ModelBlessing}
|
||||
metadata:
|
||||
annotations:
|
||||
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
||||
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/deprecated/tfx/Evaluator/component.yaml'
|
||||
implementation:
|
||||
container:
|
||||
image: tensorflow/tfx:0.29.0
|
||||
command:
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
|
||||
def Evaluator(
|
||||
examples_path,
|
||||
evaluation_path,
|
||||
blessing_path,
|
||||
model_path = None,
|
||||
baseline_model_path = None,
|
||||
schema_path = None,
|
||||
eval_config = None,
|
||||
feature_slicing_spec = None,
|
||||
fairness_indicator_thresholds = None,
|
||||
example_splits = None,
|
||||
module_file = None,
|
||||
module_path = None,
|
||||
):
|
||||
from tfx.components.evaluator.component import Evaluator as component_class
|
||||
|
||||
#Generated code
|
||||
import os
|
||||
import tempfile
|
||||
from tensorflow.io import gfile
|
||||
from google.protobuf import json_format, message
|
||||
from tfx.types import channel_utils, artifact_utils
|
||||
from tfx.components.base import base_executor
|
||||
|
||||
arguments = locals().copy()
|
||||
|
||||
component_class_args = {}
|
||||
|
||||
for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items():
|
||||
argument_value = arguments.get(name, None)
|
||||
if argument_value is None:
|
||||
continue
|
||||
parameter_type = execution_parameter.type
|
||||
if isinstance(parameter_type, type) and issubclass(parameter_type, message.Message):
|
||||
argument_value_obj = parameter_type()
|
||||
json_format.Parse(argument_value, argument_value_obj)
|
||||
else:
|
||||
argument_value_obj = argument_value
|
||||
component_class_args[name] = argument_value_obj
|
||||
|
||||
for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
|
||||
artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path')
|
||||
if artifact_path:
|
||||
artifact = channel_parameter.type()
|
||||
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
|
||||
if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
|
||||
# Recovering splits
|
||||
subdirs = gfile.listdir(artifact_path)
|
||||
# Workaround for https://github.com/tensorflow/tensorflow/issues/39167
|
||||
subdirs = [subdir.rstrip('/') for subdir in subdirs]
|
||||
split_names = [subdir.replace('Split-', '') for subdir in subdirs]
|
||||
artifact.split_names = artifact_utils.encode_split_names(sorted(split_names))
|
||||
component_class_args[name] = channel_utils.as_channel([artifact])
|
||||
|
||||
component_class_instance = component_class(**component_class_args)
|
||||
|
||||
input_dict = channel_utils.unwrap_channel_dict(component_class_instance.inputs.get_all())
|
||||
output_dict = {}
|
||||
exec_properties = component_class_instance.exec_properties
|
||||
|
||||
# Generating paths for output artifacts
|
||||
for name, channel in component_class_instance.outputs.items():
|
||||
artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path')
|
||||
if artifact_path:
|
||||
artifact = channel.type()
|
||||
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
|
||||
artifact_list = [artifact]
|
||||
channel._artifacts = artifact_list
|
||||
output_dict[name] = artifact_list
|
||||
|
||||
print('component instance: ' + str(component_class_instance))
|
||||
|
||||
executor_context = base_executor.BaseExecutor.Context(
|
||||
beam_pipeline_args=arguments.get('beam_pipeline_args'),
|
||||
tmp_dir=tempfile.gettempdir(),
|
||||
unique_id='tfx_component',
|
||||
)
|
||||
executor = component_class_instance.executor_spec.executor_class(executor_context)
|
||||
executor.Do(
|
||||
input_dict=input_dict,
|
||||
output_dict=output_dict,
|
||||
exec_properties=exec_properties,
|
||||
)
|
||||
|
||||
import json
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Evaluator', description='')
|
||||
_parser.add_argument("--examples", dest="examples_path", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--baseline-model", dest="baseline_model_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--schema", dest="schema_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--eval-config", dest="eval_config", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--feature-slicing-spec", dest="feature_slicing_spec", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--fairness-indicator-thresholds", dest="fairness_indicator_thresholds", type=json.loads, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--example-splits", dest="example_splits", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--module-file", dest="module_file", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--module-path", dest="module_path", type=str, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--evaluation", dest="evaluation_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--blessing", dest="blessing_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
|
||||
_outputs = Evaluator(**_parsed_args)
|
||||
args:
|
||||
- --examples
|
||||
- {inputPath: examples}
|
||||
- if:
|
||||
cond: {isPresent: model}
|
||||
then:
|
||||
- --model
|
||||
- {inputPath: model}
|
||||
- if:
|
||||
cond: {isPresent: baseline_model}
|
||||
then:
|
||||
- --baseline-model
|
||||
- {inputPath: baseline_model}
|
||||
- if:
|
||||
cond: {isPresent: schema}
|
||||
then:
|
||||
- --schema
|
||||
- {inputPath: schema}
|
||||
- if:
|
||||
cond: {isPresent: eval_config}
|
||||
then:
|
||||
- --eval-config
|
||||
- {inputValue: eval_config}
|
||||
- if:
|
||||
cond: {isPresent: feature_slicing_spec}
|
||||
then:
|
||||
- --feature-slicing-spec
|
||||
- {inputValue: feature_slicing_spec}
|
||||
- if:
|
||||
cond: {isPresent: fairness_indicator_thresholds}
|
||||
then:
|
||||
- --fairness-indicator-thresholds
|
||||
- {inputValue: fairness_indicator_thresholds}
|
||||
- if:
|
||||
cond: {isPresent: example_splits}
|
||||
then:
|
||||
- --example-splits
|
||||
- {inputValue: example_splits}
|
||||
- if:
|
||||
cond: {isPresent: module_file}
|
||||
then:
|
||||
- --module-file
|
||||
- {inputValue: module_file}
|
||||
- if:
|
||||
cond: {isPresent: module_path}
|
||||
then:
|
||||
- --module-path
|
||||
- {inputValue: module_path}
|
||||
- --evaluation
|
||||
- {outputPath: evaluation}
|
||||
- --blessing
|
||||
- {outputPath: blessing}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue