chore(components): clean up component directory (#7183)

* move components to contrib

* move components to contrib
This commit is contained in:
IronPan 2022-01-21 11:33:11 -08:00 committed by GitHub
parent 4d9e9bedd3
commit 694d3a3764
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
218 changed files with 0 additions and 14601 deletions

View File

@ -1,62 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def catboost_predict_class_probabilities(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict class probabilities with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='Probability')
numpy.savetxt(predictions_path, predictions)
if __name__ == '__main__':
catboost_predict_class_probabilities_op = create_component_from_func(
catboost_predict_class_probabilities,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.23'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml",
},
)

View File

@ -1,112 +0,0 @@
name: Catboost predict class probabilities
description: |-
Predict class probabilities with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: CSV}
- {name: model, type: CatBoostModel}
- {name: label_column, type: Integer, optional: true}
outputs:
- {name: predictions}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def catboost_predict_class_probabilities(
data_path,
model_path,
predictions_path,
label_column = None,
):
'''Predict class probabilities with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='Probability')
numpy.savetxt(predictions_path, predictions)
import argparse
_parser = argparse.ArgumentParser(prog='Catboost predict class probabilities', description='Predict class probabilities with a CatBoost model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = catboost_predict_class_probabilities(**_parsed_args)
args:
- --data
- {inputPath: data}
- --model
- {inputPath: model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- --predictions
- {outputPath: predictions}

View File

@ -1,62 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def catboost_predict_classes(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict classes using the CatBoost classifier model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Class predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoostClassifier, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(eval_data)
numpy.savetxt(predictions_path, predictions, fmt='%s')
if __name__ == '__main__':
catboost_predict_classes_op = create_component_from_func(
catboost_predict_classes,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.22'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_classes/from_CSV/component.yaml",
},
)

View File

@ -1,112 +0,0 @@
name: Catboost predict classes
description: |-
Predict classes using the CatBoost classifier model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Class predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: CSV}
- {name: model, type: CatBoostModel}
- {name: label_column, type: Integer, optional: true}
outputs:
- {name: predictions}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_classes/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def catboost_predict_classes(
data_path,
model_path,
predictions_path,
label_column = None,
):
'''Predict classes using the CatBoost classifier model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Class predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoostClassifier, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostClassifier()
model.load_model(model_path)
predictions = model.predict(eval_data)
numpy.savetxt(predictions_path, predictions, fmt='%s')
import argparse
_parser = argparse.ArgumentParser(prog='Catboost predict classes', description='Predict classes using the CatBoost classifier model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Class predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = catboost_predict_classes(**_parsed_args)
args:
- --data
- {inputPath: data}
- --model
- {inputPath: model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- --predictions
- {outputPath: predictions}

View File

@ -1,62 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def catboost_predict_values(
data_path: InputPath('CSV'),
model_path: InputPath('CatBoostModel'),
predictions_path: OutputPath(),
label_column: int = None,
):
'''Predict values with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
numpy.savetxt(predictions_path, predictions)
if __name__ == '__main__':
catboost_predict_values_op = create_component_from_func(
catboost_predict_values,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.23'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_values/from_CSV/component.yaml",
},
)

View File

@ -1,112 +0,0 @@
name: Catboost predict values
description: |-
Predict values with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: CSV}
- {name: model, type: CatBoostModel}
- {name: label_column, type: Integer, optional: true}
outputs:
- {name: predictions}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Predict_values/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def catboost_predict_values(
data_path,
model_path,
predictions_path,
label_column = None,
):
'''Predict values with a CatBoost model.
Args:
data_path: Path for the data in CSV format.
model_path: Path for the trained model in binary CatBoostModel format.
label_column: Column containing the label data.
predictions_path: Output path for the predictions.
Outputs:
predictions: Predictions in text format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from catboost import CatBoost, Pool
import numpy
if label_column:
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
else:
column_description_path = None
eval_data = Pool(
data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoost()
model.load_model(model_path)
predictions = model.predict(eval_data, prediction_type='RawFormulaVal')
numpy.savetxt(predictions_path, predictions)
import argparse
_parser = argparse.ArgumentParser(prog='Catboost predict values', description='Predict values with a CatBoost model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = catboost_predict_values(**_parsed_args)
args:
- --data
- {inputPath: data}
- --model
- {inputPath: model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- --predictions
- {outputPath: predictions}

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/CatBoost](https://github.com/kubeflow/pipelines/tree/master/components/contrib/CatBoost). This directory will be removed by the end of 2021.

View File

@ -1,97 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def catboost_train_classifier(
training_data_path: InputPath('CSV'),
model_path: OutputPath('CatBoostModel'),
starting_model_path: InputPath('CatBoostModel') = None,
label_column: int = 0,
loss_function: str = 'Logloss',
num_iterations: int = 500,
learning_rate: float = None,
depth: int = 6,
random_seed: int = 0,
cat_features: list = None,
text_features: list = None,
additional_training_options: dict = {},
):
'''Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'Logloss'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
text_features: A list of Text features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from pathlib import Path
from catboost import CatBoostClassifier, Pool
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
train_data = Pool(
training_data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostClassifier(
iterations=num_iterations,
depth=depth,
learning_rate=learning_rate,
loss_function=loss_function,
random_seed=random_seed,
verbose=True,
**additional_training_options,
)
model.fit(
train_data,
cat_features=cat_features,
text_features=text_features,
init_model=starting_model_path,
#verbose=False,
#plot=True,
)
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
model.save_model(model_path)
if __name__ == '__main__':
catboost_train_classifier_op = create_component_from_func(
catboost_train_classifier,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.23'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_classifier/from_CSV/component.yaml",
},
)

View File

@ -1,220 +0,0 @@
name: Catboost train classifier
description: |-
Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'Logloss'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
text_features: A list of Text features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: training_data, type: CSV}
- {name: starting_model, type: CatBoostModel, optional: true}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: loss_function, type: String, default: Logloss, optional: true}
- {name: num_iterations, type: Integer, default: '500', optional: true}
- {name: learning_rate, type: Float, optional: true}
- {name: depth, type: Integer, default: '6', optional: true}
- {name: random_seed, type: Integer, default: '0', optional: true}
- {name: cat_features, type: JsonArray, optional: true}
- {name: text_features, type: JsonArray, optional: true}
- {name: additional_training_options, type: JsonObject, default: '{}', optional: true}
outputs:
- {name: model, type: CatBoostModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_classifier/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def catboost_train_classifier(
training_data_path,
model_path,
starting_model_path = None,
label_column = 0,
loss_function = 'Logloss',
num_iterations = 500,
learning_rate = None,
depth = 6,
random_seed = 0,
cat_features = None,
text_features = None,
additional_training_options = {},
):
'''Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'Logloss'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
text_features: A list of Text features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostClassifier
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from pathlib import Path
from catboost import CatBoostClassifier, Pool
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
train_data = Pool(
training_data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostClassifier(
iterations=num_iterations,
depth=depth,
learning_rate=learning_rate,
loss_function=loss_function,
random_seed=random_seed,
verbose=True,
**additional_training_options,
)
model.fit(
train_data,
cat_features=cat_features,
text_features=text_features,
init_model=starting_model_path,
#verbose=False,
#plot=True,
)
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
model.save_model(model_path)
import json
import argparse
_parser = argparse.ArgumentParser(prog='Catboost train classifier', description="Train a CatBoost classifier model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary CatBoostModel format.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n\n loss_function: The metric to use in training and also selector of the machine learning\n problem to solve. Default = 'Logloss'\n num_iterations: Number of trees to add to the ensemble.\n learning_rate: Step size shrinkage used in update to prevents overfitting.\n Default value is selected automatically for binary classification with other parameters set to default.\n In all other cases default is 0.03.\n depth: Depth of a tree. All trees are the same depth. Default = 6\n random_seed: Random number seed. Default = 0\n\n cat_features: A list of Categorical features (indices or names).\n text_features: A list of Text features (indices or names).\n additional_training_options: A dictionary with additional options to pass to CatBoostClassifier\n\n Outputs:\n model: Trained model in binary CatBoostModel format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>")
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--loss-function", dest="loss_function", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--depth", dest="depth", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--cat-features", dest="cat_features", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--text-features", dest="text_features", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--additional-training-options", dest="additional_training_options", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = catboost_train_classifier(**_parsed_args)
args:
- --training-data
- {inputPath: training_data}
- if:
cond: {isPresent: starting_model}
then:
- --starting-model
- {inputPath: starting_model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- if:
cond: {isPresent: loss_function}
then:
- --loss-function
- {inputValue: loss_function}
- if:
cond: {isPresent: num_iterations}
then:
- --num-iterations
- {inputValue: num_iterations}
- if:
cond: {isPresent: learning_rate}
then:
- --learning-rate
- {inputValue: learning_rate}
- if:
cond: {isPresent: depth}
then:
- --depth
- {inputValue: depth}
- if:
cond: {isPresent: random_seed}
then:
- --random-seed
- {inputValue: random_seed}
- if:
cond: {isPresent: cat_features}
then:
- --cat-features
- {inputValue: cat_features}
- if:
cond: {isPresent: text_features}
then:
- --text-features
- {inputValue: text_features}
- if:
cond: {isPresent: additional_training_options}
then:
- --additional-training-options
- {inputValue: additional_training_options}
- --model
- {outputPath: model}

View File

@ -1,95 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def catboost_train_regression(
training_data_path: InputPath('CSV'),
model_path: OutputPath('CatBoostModel'),
starting_model_path: InputPath('CatBoostModel') = None,
label_column: int = 0,
loss_function: str = 'RMSE',
num_iterations: int = 500,
learning_rate: float = None,
depth: int = 6,
random_seed: int = 0,
cat_features: list = None,
additional_training_options: dict = {},
):
'''Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'RMSE'. Possible values:
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from pathlib import Path
from catboost import CatBoostRegressor, Pool
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
train_data = Pool(
training_data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostRegressor(
iterations=num_iterations,
depth=depth,
learning_rate=learning_rate,
loss_function=loss_function,
random_seed=random_seed,
verbose=True,
**additional_training_options,
)
model.fit(
train_data,
cat_features=cat_features,
init_model=starting_model_path,
#verbose=False,
#plot=True,
)
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
model.save_model(model_path)
if __name__ == '__main__':
catboost_train_regression_op = create_component_from_func(
catboost_train_regression,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.23'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_regression/from_CSV/component.yaml",
},
)

View File

@ -1,211 +0,0 @@
name: Catboost train regression
description: |-
Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'RMSE'. Possible values:
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: training_data, type: CSV}
- {name: starting_model, type: CatBoostModel, optional: true}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: loss_function, type: String, default: RMSE, optional: true}
- {name: num_iterations, type: Integer, default: '500', optional: true}
- {name: learning_rate, type: Float, optional: true}
- {name: depth, type: Integer, default: '6', optional: true}
- {name: random_seed, type: Integer, default: '0', optional: true}
- {name: cat_features, type: JsonArray, optional: true}
- {name: additional_training_options, type: JsonObject, default: '{}', optional: true}
outputs:
- {name: model, type: CatBoostModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/Train_regression/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def catboost_train_regression(
training_data_path,
model_path,
starting_model_path = None,
label_column = 0,
loss_function = 'RMSE',
num_iterations = 500,
learning_rate = None,
depth = 6,
random_seed = 0,
cat_features = None,
additional_training_options = {},
):
'''Train a CatBoost classifier model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary CatBoostModel format.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
loss_function: The metric to use in training and also selector of the machine learning
problem to solve. Default = 'RMSE'. Possible values:
'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'
num_iterations: Number of trees to add to the ensemble.
learning_rate: Step size shrinkage used in update to prevents overfitting.
Default value is selected automatically for binary classification with other parameters set to default.
In all other cases default is 0.03.
depth: Depth of a tree. All trees are the same depth. Default = 6
random_seed: Random number seed. Default = 0
cat_features: A list of Categorical features (indices or names).
additional_training_options: A dictionary with additional options to pass to CatBoostRegressor
Outputs:
model: Trained model in binary CatBoostModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import tempfile
from pathlib import Path
from catboost import CatBoostRegressor, Pool
column_descriptions = {label_column: 'Label'}
column_description_path = tempfile.NamedTemporaryFile(delete=False).name
with open(column_description_path, 'w') as column_description_file:
for idx, kind in column_descriptions.items():
column_description_file.write('{}\t{}\n'.format(idx, kind))
train_data = Pool(
training_data_path,
column_description=column_description_path,
has_header=True,
delimiter=',',
)
model = CatBoostRegressor(
iterations=num_iterations,
depth=depth,
learning_rate=learning_rate,
loss_function=loss_function,
random_seed=random_seed,
verbose=True,
**additional_training_options,
)
model.fit(
train_data,
cat_features=cat_features,
init_model=starting_model_path,
#verbose=False,
#plot=True,
)
Path(model_path).parent.mkdir(parents=True, exist_ok=True)
model.save_model(model_path)
import json
import argparse
_parser = argparse.ArgumentParser(prog='Catboost train regression', description="Train a CatBoost classifier model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary CatBoostModel format.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n\n loss_function: The metric to use in training and also selector of the machine learning\n problem to solve. Default = 'RMSE'. Possible values:\n 'RMSE', 'MAE', 'Quantile:alpha=value', 'LogLinQuantile:alpha=value', 'Poisson', 'MAPE', 'Lq:q=value'\n num_iterations: Number of trees to add to the ensemble.\n learning_rate: Step size shrinkage used in update to prevents overfitting.\n Default value is selected automatically for binary classification with other parameters set to default.\n In all other cases default is 0.03.\n depth: Depth of a tree. All trees are the same depth. Default = 6\n random_seed: Random number seed. Default = 0\n\n cat_features: A list of Categorical features (indices or names).\n additional_training_options: A dictionary with additional options to pass to CatBoostRegressor\n\n Outputs:\n model: Trained model in binary CatBoostModel format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>")
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--loss-function", dest="loss_function", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--depth", dest="depth", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--cat-features", dest="cat_features", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--additional-training-options", dest="additional_training_options", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = catboost_train_regression(**_parsed_args)
args:
- --training-data
- {inputPath: training_data}
- if:
cond: {isPresent: starting_model}
then:
- --starting-model
- {inputPath: starting_model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- if:
cond: {isPresent: loss_function}
then:
- --loss-function
- {inputValue: loss_function}
- if:
cond: {isPresent: num_iterations}
then:
- --num-iterations
- {inputValue: num_iterations}
- if:
cond: {isPresent: learning_rate}
then:
- --learning-rate
- {inputValue: learning_rate}
- if:
cond: {isPresent: depth}
then:
- --depth
- {inputValue: depth}
- if:
cond: {isPresent: random_seed}
then:
- --random-seed
- {inputValue: random_seed}
- if:
cond: {isPresent: cat_features}
then:
- --cat-features
- {inputValue: cat_features}
- if:
cond: {isPresent: additional_training_options}
then:
- --additional-training-options
- {inputValue: additional_training_options}
- --model
- {outputPath: model}

View File

@ -1,76 +0,0 @@
import kfp
from kfp import components
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e69a6694/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
catboost_train_classifier_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Train_classifier/from_CSV/component.yaml')
catboost_train_regression_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Train_regression/from_CSV/component.yaml')
catboost_predict_classes_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_classes/from_CSV/component.yaml')
catboost_predict_values_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_values/from_CSV/component.yaml')
catboost_predict_class_probabilities_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml')
catboost_to_apple_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml')
catboost_to_onnx_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/f97ad2/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml')
def catboost_pipeline():
training_data_in_csv = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
).output
training_data_for_classification_in_csv = pandas_transform_csv_op(
table=training_data_in_csv,
transform_code='''df.insert(0, "was_tipped", df["tips"] > 0); del df["tips"]''',
).output
catboost_train_regression_task = catboost_train_regression_op(
training_data=training_data_in_csv,
loss_function='RMSE',
label_column=0,
num_iterations=200,
)
regression_model = catboost_train_regression_task.outputs['model']
catboost_train_classifier_task = catboost_train_classifier_op(
training_data=training_data_for_classification_in_csv,
label_column=0,
num_iterations=200,
)
classification_model = catboost_train_classifier_task.outputs['model']
evaluation_data_for_regression_in_csv = training_data_in_csv
evaluation_data_for_classification_in_csv = training_data_for_classification_in_csv
catboost_predict_values_op(
data=evaluation_data_for_regression_in_csv,
model=regression_model,
label_column=0,
)
catboost_predict_classes_op(
data=evaluation_data_for_classification_in_csv,
model=classification_model,
label_column=0,
)
catboost_predict_class_probabilities_op(
data=evaluation_data_for_classification_in_csv,
model=classification_model,
label_column=0,
)
catboost_to_apple_op(regression_model)
catboost_to_apple_op(classification_model)
catboost_to_onnx_op(regression_model)
catboost_to_onnx_op(classification_model)
if __name__ == '__main__':
kfp_endpoint=None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(catboost_pipeline, arguments={})

View File

@ -1,41 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_CatBoostModel_to_AppleCoreMLModel(
model_path: InputPath('CatBoostModel'),
converted_model_path: OutputPath('AppleCoreMLModel'),
):
'''Convert CatBoost model to Apple CoreML format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in Apple CoreML format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from catboost import CatBoost
model = CatBoost()
model.load_model(model_path)
model.save_model(
converted_model_path,
format="coreml",
# export_parameters={'prediction_type': 'probability'},
# export_parameters={'prediction_type': 'raw'},
)
if __name__ == '__main__':
create_component_from_func(
convert_CatBoostModel_to_AppleCoreMLModel,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.22'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml",
},
)

View File

@ -1,78 +0,0 @@
name: Convert CatBoostModel to AppleCoreMLModel
description: |-
Convert CatBoost model to Apple CoreML format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in Apple CoreML format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: model, type: CatBoostModel}
outputs:
- {name: converted_model, type: AppleCoreMLModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_CatBoostModel_to_AppleCoreMLModel(
model_path,
converted_model_path,
):
'''Convert CatBoost model to Apple CoreML format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in Apple CoreML format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from catboost import CatBoost
model = CatBoost()
model.load_model(model_path)
model.save_model(
converted_model_path,
format="coreml",
# export_parameters={'prediction_type': 'probability'},
# export_parameters={'prediction_type': 'raw'},
)
import argparse
_parser = argparse.ArgumentParser(prog='Convert CatBoostModel to AppleCoreMLModel', description='Convert CatBoost model to Apple CoreML format.\n\n Args:\n model_path: Path of a trained model in binary CatBoost model format.\n converted_model_path: Output path for the converted model.\n\n Outputs:\n converted_model: Model in Apple CoreML format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = convert_CatBoostModel_to_AppleCoreMLModel(**_parsed_args)
args:
- --model
- {inputPath: model}
- --converted-model
- {outputPath: converted_model}

View File

@ -1,36 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_CatBoostModel_to_ONNX(
model_path: InputPath('CatBoostModel'),
converted_model_path: OutputPath('ONNX'),
):
'''Convert CatBoost model to ONNX format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in ONNX format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from catboost import CatBoost
model = CatBoost()
model.load_model(model_path)
model.save_model(converted_model_path, format="onnx")
if __name__ == '__main__':
create_component_from_func(
convert_CatBoostModel_to_ONNX,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['catboost==0.22'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml",
},
)

View File

@ -1,73 +0,0 @@
name: Convert CatBoostModel to ONNX
description: |-
Convert CatBoost model to ONNX format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in ONNX format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: model, type: CatBoostModel}
outputs:
- {name: converted_model, type: ONNX}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_CatBoostModel_to_ONNX(
model_path,
converted_model_path,
):
'''Convert CatBoost model to ONNX format.
Args:
model_path: Path of a trained model in binary CatBoost model format.
converted_model_path: Output path for the converted model.
Outputs:
converted_model: Model in ONNX format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from catboost import CatBoost
model = CatBoost()
model.load_model(model_path)
model.save_model(converted_model_path, format="onnx")
import argparse
_parser = argparse.ArgumentParser(prog='Convert CatBoostModel to ONNX', description='Convert CatBoost model to ONNX format.\n\n Args:\n model_path: Path of a trained model in binary CatBoost model format.\n converted_model_path: Output path for the converted model.\n\n Outputs:\n converted_model: Model in ONNX format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = convert_CatBoostModel_to_ONNX(**_parsed_args)
args:
- --model
- {inputPath: model}
- --converted-model
- {outputPath: converted_model}

View File

@ -5,333 +5,3 @@ Components are the building blocks of pipelines.
Component definition files describe component interface (inputs and outputs) and implementation (how to call the containerized program).
Users can load components with KFP SDK, instantiate the components and compose them in a pipeline graph.
## Example components
* Python-based: [Train with XGBoost](https://github.com/kubeflow/pipelines/tree/ea94251143f300fafed8950a1b4ba0a6b6065094/components/XGBoost/Train)
* Command-line-based: [Execute notebook](https://github.com/kubeflow/pipelines/blob/329ed48/components/notebooks/Run_notebook_using_papermill/component.yaml)
* Graph-based: [Cross-validate](https://github.com/kubeflow/pipelines/tree/34cb59daaea4f800afae5d968b5efd31eb432291/components/XGBoost/Cross_validation_for_regression/from_CSV)
See how to [build your own components](https://www.kubeflow.org/docs/pipelines/sdk/component-development/).
Also see the tutorials for [data passing for components based on python functions](https://github.com/kubeflow/pipelines/blob/fd5778d/samples/tutorials/Data%20passing%20in%20python%20components.ipynb) and [components based on command-line programs](https://github.com/Ark-kun/kfp_samples/blob/ae1a5b6/2019-10%20Kubeflow%20summit/106%20-%20Creating%20components%20from%20command-line%20programs/106%20-%20Creating%20components%20from%20command-line%20programs.ipynb).
## Index of components
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_ApacheArrowFeather](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_ApacheArrowFeather) / [Convert apache arrow feather to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_CSV) / [Convert csv to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_CSV/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [from_TSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_TSV) / [Convert tsv to apache parquet](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/from_TSV/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_ApacheArrowFeather](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_ApacheArrowFeather) / [Convert apache parquet to apache arrow feather](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_CSV) / [Convert apache parquet to csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_CSV/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet) / [to_TSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_TSV) / [Convert apache parquet to tsv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/ApacheParquet/to_TSV/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5) / [to_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5/to_TensorflowSavedModel) / [Keras convert hdf5 model to tf saved model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [OnnxModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_KerasModelHdf5) / [To ONNX from Keras HDF5 model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_KerasModelHdf5/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [OnnxModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_TensorflowSavedModel) / [To ONNX from Tensorflow SavedModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/OnnxModel/from_TensorflowSavedModel/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSGraphModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow JS GraphModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSGraphModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel) / [Convert Tensorflow SavedModel to Tensorflow JS GraphModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSLayersModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow JS LayersModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowJSLayersModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel) / [Convert Keras SavedModel to Tensorflow JS LayersModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowLiteModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel) / [from_KerasModelHdf5](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_KerasModelHdf5) / [Convert Keras HDF5 model to Tensorflow Lite model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_KerasModelHdf5/component.yaml)
/ [_converters](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters) / [TensorflowLiteModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel) / [from_TensorflowSavedModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel) / [Convert Tensorflow SavedModel to Tensorflow Lite model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [athena](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena) / [query](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena/query) / [Athena Query](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/athena/query/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [create_cluster](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/create_cluster) / [emr_create_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/create_cluster/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [delete_cluster](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/delete_cluster) / [emr_delete_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/delete_cluster/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [submit_pyspark_job](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_pyspark_job) / [emr_submit_pyspark_job](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_pyspark_job/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws) / [emr](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr) / [submit_spark_job](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_spark_job) / [emr_submit_spark_job](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/aws/emr/submit_spark_job/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [batch_transform](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/batch_transform) / [SageMaker - Batch Transformation](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/batch_transform/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [create_simulation_app](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/create_simulation_app) / [RoboMaker - Create Simulation Application](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/create_simulation_app/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [delete_simulation_app](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/delete_simulation_app) / [RoboMaker - Delete Simulation Application](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/delete_simulation_app/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [deploy](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/deploy) / [SageMaker - Deploy Model](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/deploy/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [ground_truth](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/ground_truth) / [SageMaker - Ground Truth](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/ground_truth/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [hyperparameter_tuning](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/hyperparameter_tuning) / [SageMaker - Hyperparameter Tuning](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/hyperparameter_tuning/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [model](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/model) / [SageMaker - Create Model](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/model/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [process](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/process) / [SageMaker - Processing Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/process/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [rlestimator](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/rlestimator) / [SageMaker - RLEstimator Training Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/rlestimator/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [simulation_job](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job) / [RoboMaker - Create Simulation Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [simulation_job_batch](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job_batch) / [RoboMaker - Create Simulation Job Batch](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/simulation_job_batch/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [train](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/train) / [SageMaker - Training Job](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/train/component.yaml)
/ [aws](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws) / [sagemaker](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker) / [workteam](https://github.com/kubeflow/pipelines/tree/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/workteam) / [SageMaker - Private Workforce](https://raw.githubusercontent.com/kubeflow/pipelines/079eea369ae468fd64cfb513ff0392a25d895ca9/components/aws/sagemaker/workteam/component.yaml)
/ [azure](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure) / [azuredevops](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops) / [queue-pipeline](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops/queue-pipeline) / [Queue Azure Pipeline](https://raw.githubusercontent.com/kubeflow/pipelines/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azuredevops/queue-pipeline/component.yaml)
/ [azure](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure) / [azureml](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml) / [aml-deploy-model](https://github.com/kubeflow/pipelines/tree/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml/aml-deploy-model) / [Azure ML Deploy Model](https://raw.githubusercontent.com/kubeflow/pipelines/ec721fe94dbcaa054b1057e5503e4f9823fdf2a5/components/azure/azureml/aml-deploy-model/component.yaml)
/ [azure](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure) / [azureml](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml) / [aml-register-model](https://github.com/kubeflow/pipelines/tree/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml/aml-register-model) / [Azure ML Register Model](https://raw.githubusercontent.com/kubeflow/pipelines/210eb8afaae43000cf46a8aec6c17dd3eda3c08f/components/azure/azureml/aml-register-model/component.yaml)
/ [azure](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure) / [azuresynapse](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse) / [runsparkjob](https://github.com/kubeflow/pipelines/tree/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse/runsparkjob) / [Azure Synapse Run Spark Job](https://raw.githubusercontent.com/kubeflow/pipelines/e976d6d4696262e319ae971ffa645297cf258d80/components/azure/azuresynapse/runsparkjob/component.yaml)
/ [basics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/basics) / [Calculate_hash](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/basics/Calculate_hash) / [Calculate data hash](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/basics/Calculate_hash/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [convert_CatBoostModel_to_AppleCoreMLModel](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel) / [Convert CatBoostModel to AppleCoreMLModel](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_AppleCoreMLModel/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [convert_CatBoostModel_to_ONNX](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_ONNX) / [Convert CatBoostModel to ONNX](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/convert_CatBoostModel_to_ONNX/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_class_probabilities](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities/from_CSV) / [Catboost predict class probabilities](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_classes](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes/from_CSV) / [Catboost predict classes](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_classes/from_CSV/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Predict_values](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values/from_CSV) / [Catboost predict values](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Predict_values/from_CSV/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier/from_CSV) / [Catboost train classifier](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_classifier/from_CSV/component.yaml)
/ [CatBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost) / [Train_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression/from_CSV) / [Catboost train regression](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/CatBoost/Train_regression/from_CSV/component.yaml)
/ [dataset_manipulation](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation) / [split_data_into_folds](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds) / [in_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds/in_CSV) / [Split table into folds](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml)
/ [datasets](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/datasets) / [Chicago_Taxi_Trips](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/datasets/Chicago_Taxi_Trips) / [Chicago Taxi Trips dataset](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/datasets/Chicago_Taxi_Trips/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/predict) / [Predict using TF on Dataflow](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/predict/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tfdv](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfdv) / [TFX - Data Validation](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfdv/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tfma](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfma) / [TFX - Analyze model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tfma/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [dataflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow) / [tft](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tft) / [Transform using TF on Dataflow](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/dataflow/tft/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Evaluator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator) / [Evaluator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Evaluator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/with_URI_IO) / [Evaluator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Evaluator/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [BigQueryExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen) / [BigQueryExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [BigQueryExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/with_URI_IO) / [BigQueryExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/BigQueryExampleGen/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [CsvExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen) / [CsvExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [CsvExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/with_URI_IO) / [CsvExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/CsvExampleGen/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [ImportExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen) / [ImportExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen) / [ImportExampleGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/with_URI_IO) / [ImportExampleGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleGen/ImportExampleGen/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleValidator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator) / [ExampleValidator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [ExampleValidator](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/with_URI_IO) / [ExampleValidator](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/ExampleValidator/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [SchemaGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen) / [SchemaGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [SchemaGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/with_URI_IO) / [SchemaGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/SchemaGen/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [StatisticsGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen) / [StatisticsGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [StatisticsGen](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/with_URI_IO) / [StatisticsGen](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/StatisticsGen/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Trainer](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer) / [Trainer](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Trainer](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/with_URI_IO) / [Trainer](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Trainer/with_URI_IO/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Transform](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform) / [Transform](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/component.yaml)
/ [deprecated](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated) / [tfx](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx) / [Transform](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform) / [with_URI_IO](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/with_URI_IO) / [Transform](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/deprecated/tfx/Transform/with_URI_IO/component.yaml)
/ [diagnostics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics) / [diagnose_me](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics/diagnose_me) / [Run diagnose me](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/diagnostics/diagnose_me/component.yaml)
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [get_file](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_file) / [Get file](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_file/component.yaml)
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [get_subdirectory](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_subdirectory) / [Get subdirectory](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/get_subdirectory/component.yaml)
/ [filesystem](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem) / [list_items](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/list_items) / [List items](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/filesystem/list_items/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl) / [create_dataset_for_tables](https://github.com/kubeflow/pipelines/tree/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl/create_dataset_for_tables) / [Automl create dataset for tables](https://raw.githubusercontent.com/kubeflow/pipelines/c653c300508eea2da13e8eb464fd43028171cc4a/components/gcp/automl/create_dataset_for_tables/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [create_model_for_tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/create_model_for_tables) / [Automl create model for tables](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/create_model_for_tables/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [deploy_model](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/deploy_model) / [Automl deploy model](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/deploy_model/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [export_data_to_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_data_to_gcs) / [Automl export data to gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_data_to_gcs/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [export_model_to_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_model_to_gcs) / [Automl export model to gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/export_model_to_gcs/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [import_data_from_bigquery](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_bigquery) / [Automl import data from bigquery](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_bigquery/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [import_data_from_gcs](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_gcs) / [Automl import data from gcs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/import_data_from_gcs/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [prediction_service_batch_predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/prediction_service_batch_predict) / [Automl prediction service batch predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/prediction_service_batch_predict/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp) / [automl](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl) / [split_dataset_table_column_names](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/split_dataset_table_column_names) / [Automl split dataset table column names](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/gcp/automl/split_dataset_table_column_names/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_CSV](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_CSV) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_CSV/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_gcs](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_gcs) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_gcs/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [bigquery](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery) / [query](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query) / [to_table](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/bigquery/query/to_table) / [Bigquery - Query](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/bigquery/query/to_table/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow) / [launch_python](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow/launch_python) / [Launch Python](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataflow/launch_python/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow) / [launch_template](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataflow/launch_template) / [Launch Dataflow Template](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataflow/launch_template/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [create_cluster](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/create_cluster) / [dataproc_create_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/create_cluster/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [delete_cluster](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/delete_cluster) / [dataproc_delete_cluster](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/delete_cluster/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_hadoop_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_hadoop_job) / [dataproc_submit_hadoop_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_hadoop_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_hive_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_hive_job) / [dataproc_submit_hive_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_hive_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_pig_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_pig_job) / [dataproc_submit_pig_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_pig_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_pyspark_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_pyspark_job) / [dataproc_submit_pyspark_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_pyspark_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_spark_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_spark_job) / [dataproc_submit_spark_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_spark_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [dataproc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc) / [submit_sparksql_job](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/dataproc/submit_sparksql_job) / [dataproc_submit_sparksql_job](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/dataproc/submit_sparksql_job/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [batch_predict](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/batch_predict) / [Batch predict against a model with Cloud ML Engine](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/batch_predict/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [deploy](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/deploy) / [Deploying a trained model to Cloud Machine Learning Engine](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/deploy/component.yaml)
/ [gcp](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp) / [ml_engine](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine) / [train](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/gcp/ml_engine/train) / [Submitting a Cloud ML training job as a pipeline step](https://raw.githubusercontent.com/kubeflow/pipelines/1.7.0-rc.3/components/gcp/ml_engine/train/component.yaml)
/ [git](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/git) / [clone](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/git/clone) / [Git clone](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/git/clone/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [AutoML](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML) / [Tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables) / [Create_dataset](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset/from_CSV) / [Automl create tables dataset from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/AutoML/Tables/Create_dataset/from_CSV/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Add_measurement_for_trial](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Add_measurement_for_trial) / [Add measurement for trial in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Add_measurement_for_trial/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Create_study](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Create_study) / [Create study in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Create_study/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer) / [Suggest_parameter_sets_based_on_measurements](https://github.com/kubeflow/pipelines/tree/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer/Suggest_parameter_sets_based_on_measurements) / [Suggest parameter sets from measurements using gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/f30aa0975d246e55ee3619ca0d96bd4fdbb27f58/components/google-cloud/Optimizer/Suggest_parameter_sets_based_on_measurements/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud) / [Optimizer](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer) / [Suggest_trials](https://github.com/kubeflow/pipelines/tree/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Suggest_trials) / [Suggest trials in gcp ai platform optimizer](https://raw.githubusercontent.com/kubeflow/pipelines/b601832157fec7ea914ce5bc063559a1411cc5e1/components/google-cloud/Optimizer/Suggest_trials/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download_blob](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_blob) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_blob/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [download_dir](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_dir) / [Download from GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/download_dir/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [list](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/list) / [List blobs](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/list/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [upload_to_explicit_uri](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_explicit_uri) / [Upload to GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_explicit_uri/component.yaml)
/ [google-cloud](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud) / [storage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage) / [upload_to_unique_uri](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_unique_uri) / [Upload to GCS](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/google-cloud/storage/upload_to_unique_uri/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [commons](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons) / [config](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons/config) / [Create Secret - Kubernetes Cluster](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/commons/config/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [ffdl](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl) / [serve](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/serve) / [Serve PyTorch Model - Seldon Core](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/serve/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [ffdl](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl) / [train](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/train) / [Train Model - FfDL](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/ffdl/train/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [data_preprocess_spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/data_preprocess_spark) / [Preprocess Data using Spark - IBM Cloud](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/data_preprocess_spark/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [store_spark_model](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/store_spark_model) / [Store Spark Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/store_spark_model/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark) / [train_spark](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/train_spark) / [Train Spark Model - IBM Cloud](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/spark/train_spark/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [deploy](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/deploy) / [Deploy Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/deploy/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage) / [monitor_fairness](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_fairness) / [Monitor Fairness - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_fairness/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage) / [monitor_quality](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_quality) / [Monitor quality - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/manage/monitor_quality/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson) / [manage](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage) / [subscribe](https://github.com/kubeflow/pipelines/tree/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage/subscribe) / [Subscribe - Watson OpenScale](https://raw.githubusercontent.com/kubeflow/pipelines/121bfdfc5f832948a799a6e93d7c1cd985c5d254/components/ibm-components/watson/manage/subscribe/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [store](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/store) / [Store model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/store/component.yaml)
/ [ibm-components](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components) / [watson](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson) / [train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/train) / [Train Model - Watson Machine Learning](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ibm-components/watson/train/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Build_dict](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_dict) / [Build dict](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_dict/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Build_list](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_list) / [Build list](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Build_list/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json) / [Combine_lists](https://github.com/kubeflow/pipelines/tree/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Combine_lists) / [Combine lists](https://raw.githubusercontent.com/kubeflow/pipelines/616c48babede5cde4ef20b6c35cea24c66b1c44d/components/json/Combine_lists/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Get_element_by_index](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_index) / [Get element by index from JSON](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_index/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Get_element_by_key](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_key) / [Get element by key from JSON](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Get_element_by_key/component.yaml)
/ [json](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json) / [Query](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Query) / [Query JSON using JQ](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/json/Query/component.yaml)
/ [keras](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras) / [Train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier/from_CSV) / [Keras train classifier from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/keras/Train_classifier/from_CSV/component.yaml)
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow) / [deployer](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow/deployer) / [Kubeflow - Serve TF model](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/kubeflow/deployer/component.yaml)
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow) / [dnntrainer](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/kubeflow/dnntrainer) / [Train FC DNN using TF](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/kubeflow/dnntrainer/component.yaml)
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow) / [katib-launcher](https://github.com/kubeflow/pipelines/tree/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow/katib-launcher) / [Katib - Launch Experiment](https://raw.githubusercontent.com/kubeflow/pipelines/34d23aa924720ead13fe67ebca5c1ab9926585ee/components/kubeflow/katib-launcher/component.yaml)
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow) / [kfserving](https://github.com/kubeflow/pipelines/tree/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow/kfserving) / [Kubeflow - Serve Model using KFServing](https://raw.githubusercontent.com/kubeflow/pipelines/65bed9b6d1d676ef2d541a970d3edc0aee12400d/components/kubeflow/kfserving/component.yaml)
/ [kubeflow](https://github.com/kubeflow/pipelines/tree/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow) / [launcher](https://github.com/kubeflow/pipelines/tree/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow/launcher) / [Kubeflow - Launch TFJob](https://raw.githubusercontent.com/kubeflow/pipelines/dd31142a57053e0b6f1416a3ecb4c8a94faa27f9/components/kubeflow/launcher/component.yaml)
/ [local](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local) / [confusion_matrix](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local/confusion_matrix) / [Confusion matrix](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/local/confusion_matrix/component.yaml)
/ [local](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local) / [roc](https://github.com/kubeflow/pipelines/tree/8b3d741c6ef9f80190c962d4640690ea723b71e9/components/local/roc) / [ROC curve](https://raw.githubusercontent.com/kubeflow/pipelines/1.8.0-alpha.0/components/local/roc/component.yaml)
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Aggregate_regression_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Aggregate_regression_metrics) / [Aggregate regression metrics](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Aggregate_regression_metrics/component.yaml)
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Calculate_classification_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics/from_CSV) / [Calculate classification metrics from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_classification_metrics/from_CSV/component.yaml)
/ [ml_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics) / [Calculate_regression_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics/from_CSV) / [Calculate regression metrics from csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml)
/ [notebooks](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks) / [Run_notebook_using_papermill](https://github.com/kubeflow/pipelines/tree/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks/Run_notebook_using_papermill) / [Run notebook using papermill](https://raw.githubusercontent.com/kubeflow/pipelines/b656fbf41e4801c4264ca3cb217a7913ac825a3d/components/notebooks/Run_notebook_using_papermill/component.yaml)
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [delete](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/delete) / [nuclio delete](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/delete/component.yaml)
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [deploy](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/deploy) / [nuclio deploy](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/deploy/component.yaml)
/ [nuclio](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio) / [invoker](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/invoker) / [nuclio invoker](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/nuclio/invoker/component.yaml)
/ [pandas](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas) / [Transform_DataFrame](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame) / [in_ApacheParquet_format](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_ApacheParquet_format) / [Pandas Transform DataFrame in ApacheParquet format](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_ApacheParquet_format/component.yaml)
/ [pandas](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas) / [Transform_DataFrame](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame) / [in_CSV_format](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_CSV_format) / [Pandas Transform DataFrame in CSV format](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml)
/ [presto](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/presto) / [query](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/presto/query) / [Presto Query](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/presto/query/component.yaml)
/ [sample](https://github.com/kubeflow/pipelines/tree/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample) / [C%23_script](https://github.com/kubeflow/pipelines/tree/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample/C%23_script) / [Filter text](https://raw.githubusercontent.com/kubeflow/pipelines/c5e6ca23176355e3c01b6a778bfed46d3d041be8/components/sample/C%23_script/component.yaml)
/ [sample](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample) / [keras](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras) / [train_classifier](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras/train_classifier) / [Keras - Train classifier](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/sample/keras/train_classifier/component.yaml)
/ [sample](https://github.com/kubeflow/pipelines/tree/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample) / [R_script](https://github.com/kubeflow/pipelines/tree/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample/R_script) / [Filter text](https://raw.githubusercontent.com/kubeflow/pipelines/3826edfcde77eb92a47dea1b9241a657236f45bc/components/sample/R_script/component.yaml)
/ [sample](https://github.com/kubeflow/pipelines/tree/1417e5e794103164a2836f86116666ef965bf1f5/components/sample) / [Shell_script](https://github.com/kubeflow/pipelines/tree/1417e5e794103164a2836f86116666ef965bf1f5/components/sample/Shell_script) / [Filter text using shell and grep](https://raw.githubusercontent.com/kubeflow/pipelines/1417e5e794103164a2836f86116666ef965bf1f5/components/sample/Shell_script/component.yaml)
/ [tables](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tables) / [Remove_header](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tables/Remove_header) / [Remove header](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/tables/Remove_header/component.yaml)
/ [tensorflow](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow) / [tensorboard](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard) / [prepare_tensorboard](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard/prepare_tensorboard) / [Create Tensorboard visualization](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml)
/ [web](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/web) / [Download](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/web/Download) / [Download data](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/web/Download/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Cross_validation_for_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression/from_CSV) / [Xgboost 5 fold cross validation for regression](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict) / [Xgboost predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Predict](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict) / [from_ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/from_ApacheParquet) / [Xgboost predict](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Predict/from_ApacheParquet/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train) / [Xgboost train](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train) / [from_ApacheParquet](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/from_ApacheParquet) / [Xgboost train](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train/from_ApacheParquet/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train_and_cross-validate_regression](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression/from_CSV) / [Xgboost train and cv regression on csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml)
/ [XGBoost](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost) / [Train_regression_and_calculate_metrics](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics) / [from_CSV](https://github.com/kubeflow/pipelines/tree/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV) / [Xgboost train regression and calculate metrics on csv](https://raw.githubusercontent.com/kubeflow/pipelines/0795597562e076437a21745e524b5c960b1edb68/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml)

View File

@ -1,71 +0,0 @@
from collections import OrderedDict
from kfp import components
split_table_into_folds_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml')
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
drop_header_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
calculate_regression_metrics_from_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
aggregate_regression_metrics_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml')
def xgboost_5_fold_cross_validation_for_regression(
data: 'CSV',
label_column: int = 0,
objective: str = 'reg:squarederror',
num_iterations: int = 200,
):
folds = split_table_into_folds_op(data).outputs
fold_metrics = {}
for i in range(1, 6):
training_data = folds['train_' + str(i)]
testing_data = folds['test_' + str(i)]
model = xgboost_train_on_csv_op(
training_data=training_data,
label_column=label_column,
objective=objective,
num_iterations=num_iterations,
).outputs['model']
predictions = xgboost_predict_on_csv_op(
data=testing_data,
model=model,
label_column=label_column,
).output
true_values_table = pandas_transform_csv_op(
table=testing_data,
transform_code='df = df[["tips"]]',
).output
true_values = drop_header_op(true_values_table).output
metrics = calculate_regression_metrics_from_csv_op(
true_values=true_values,
predicted_values=predictions,
).outputs['metrics']
fold_metrics['metrics_' + str(i)] = metrics
aggregated_metrics_task = aggregate_regression_metrics_op(**fold_metrics)
return OrderedDict([
('mean_absolute_error', aggregated_metrics_task.outputs['mean_absolute_error']),
('mean_squared_error', aggregated_metrics_task.outputs['mean_squared_error']),
('root_mean_squared_error', aggregated_metrics_task.outputs['root_mean_squared_error']),
('metrics', aggregated_metrics_task.outputs['metrics']),
])
if __name__ == '__main__':
xgboost_5_fold_cross_validation_for_regression_op = components.create_graph_component_from_pipeline_func(
xgboost_5_fold_cross_validation_for_regression,
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml",
},
)

View File

@ -1,276 +0,0 @@
name: Xgboost 5 fold cross validation for regression
inputs:
- {name: data, type: CSV}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
- {name: num_iterations, type: Integer, default: '200', optional: true}
outputs:
- {name: mean_absolute_error, type: Float}
- {name: mean_squared_error, type: Float}
- {name: root_mean_squared_error, type: Float}
- {name: metrics, type: JsonObject}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml'
implementation:
graph:
tasks:
Split table into folds:
componentRef: {digest: 9956223bcecc7294ca1afac39b60ada4a935a571d817c3dfbf2ea4a211afe3d1,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'}
arguments:
table:
graphInput: {inputName: data}
Xgboost train:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_1, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format, type: CSV}
Calculate regression metrics from csv:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
Xgboost train 2:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_2, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 2:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 2, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format 2:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 2:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 2, type: CSV}
Calculate regression metrics from csv 2:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 2}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 2, type: Text}
Xgboost train 3:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_3, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 3:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 3, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format 3:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 3:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 3, type: CSV}
Calculate regression metrics from csv 3:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 3}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 3, type: Text}
Xgboost train 4:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_4, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 4:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 4, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format 4:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 4:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 4, type: CSV}
Calculate regression metrics from csv 4:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 4}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 4, type: Text}
Xgboost train 5:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_5, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 5:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 5, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format 5:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 5:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 5, type: CSV}
Calculate regression metrics from csv 5:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 5}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 5, type: Text}
Aggregate regression metrics from csv:
componentRef: {digest: 3e128130521eff8d43764f3dcb037316cdd6490ad2878df5adef416f7c2f3c19,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml'}
arguments:
metrics_1:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv, type: JsonObject}
metrics_2:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 2, type: JsonObject}
metrics_3:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 3, type: JsonObject}
metrics_4:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 4, type: JsonObject}
metrics_5:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 5, type: JsonObject}
outputValues:
mean_absolute_error:
taskOutput: {outputName: mean_absolute_error, taskId: Aggregate regression
metrics from csv, type: Float}
mean_squared_error:
taskOutput: {outputName: mean_squared_error, taskId: Aggregate regression
metrics from csv, type: Float}
root_mean_squared_error:
taskOutput: {outputName: root_mean_squared_error, taskId: Aggregate regression
metrics from csv, type: Float}
metrics:
taskOutput: {outputName: metrics, taskId: Aggregate regression metrics from
csv, type: JsonObject}

View File

@ -1,58 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def xgboost_predict(
data_path: InputPath('CSV'), # Also supports LibSVM
model_path: InputPath('XGBoostModel'),
predictions_path: OutputPath('Predictions'),
label_column: int = None,
):
'''Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in CSV format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column: Column containing the label data.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
import numpy
import pandas
import xgboost
df = pandas.read_csv(
data_path,
)
if label_column is not None:
df = df.drop(columns=[df.columns[label_column]])
testing_data = xgboost.DMatrix(
data=df,
)
model = xgboost.Booster(model_file=model_path)
predictions = model.predict(testing_data)
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
numpy.savetxt(predictions_path, predictions)
if __name__ == '__main__':
create_component_from_func(
xgboost_predict,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=[
'xgboost==1.1.1',
'pandas==1.0.5',
],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/component.yaml",
},
)

View File

@ -1,103 +0,0 @@
name: Xgboost predict
description: |-
Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in CSV format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column: Column containing the label data.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: CSV}
- {name: model, type: XGBoostModel}
- {name: label_column, type: Integer, optional: true}
outputs:
- {name: predictions, type: Predictions}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def xgboost_predict(
data_path, # Also supports LibSVM
model_path,
predictions_path,
label_column = None,
):
'''Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in CSV format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column: Column containing the label data.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
import numpy
import pandas
import xgboost
df = pandas.read_csv(
data_path,
)
if label_column is not None:
df = df.drop(columns=[df.columns[label_column]])
testing_data = xgboost.DMatrix(
data=df,
)
model = xgboost.Booster(model_file=model_path)
predictions = model.predict(testing_data)
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
numpy.savetxt(predictions_path, predictions)
import argparse
_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in CSV format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column: Column containing the label data.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = xgboost_predict(**_parsed_args)
args:
- --data
- {inputPath: data}
- --model
- {inputPath: model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- --predictions
- {outputPath: predictions}

View File

@ -1,58 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def xgboost_predict(
data_path: InputPath('ApacheParquet'),
model_path: InputPath('XGBoostModel'),
predictions_path: OutputPath('Predictions'),
label_column_name: str = None,
):
'''Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in Apache Parquet format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
import numpy
import pandas
import xgboost
# Loading data
df = pandas.read_parquet(data_path)
if label_column_name:
df = df.drop(columns=[label_column_name])
evaluation_data = xgboost.DMatrix(
data=df,
)
# Training
model = xgboost.Booster(model_file=model_path)
predictions = model.predict(evaluation_data)
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
numpy.savetxt(predictions_path, predictions)
if __name__ == '__main__':
create_component_from_func(
xgboost_predict,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=[
'xgboost==1.1.1',
'pandas==1.0.5',
'pyarrow==0.17.1',
],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/from_ApacheParquet/component.yaml",
},
)

View File

@ -1,102 +0,0 @@
name: Xgboost predict
description: |-
Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in Apache Parquet format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: ApacheParquet}
- {name: model, type: XGBoostModel}
- {name: label_column_name, type: String, optional: true}
outputs:
- {name: predictions, type: Predictions}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Predict/from_ApacheParquet/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
'pyarrow==0.17.1' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def xgboost_predict(
data_path,
model_path,
predictions_path,
label_column_name = None,
):
'''Make predictions using a trained XGBoost model.
Args:
data_path: Path for the feature data in Apache Parquet format.
model_path: Path for the trained model in binary XGBoost format.
predictions_path: Output path for the predictions.
label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
import numpy
import pandas
import xgboost
# Loading data
df = pandas.read_parquet(data_path)
if label_column_name:
df = df.drop(columns=[label_column_name])
evaluation_data = xgboost.DMatrix(
data=df,
)
# Training
model = xgboost.Booster(model_file=model_path)
predictions = model.predict(evaluation_data)
Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)
numpy.savetxt(predictions_path, predictions)
import argparse
_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in Apache Parquet format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column-name", dest="label_column_name", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = xgboost_predict(**_parsed_args)
args:
- --data
- {inputPath: data}
- --model
- {inputPath: model}
- if:
cond: {isPresent: label_column_name}
then:
- --label-column-name
- {inputValue: label_column_name}
- --predictions
- {outputPath: predictions}

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/XGBoost](https://github.com/kubeflow/pipelines/tree/master/components/contrib/XGBoost). This directory will be removed by the end of 2021.

View File

@ -1,94 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def xgboost_train(
training_data_path: InputPath('CSV'), # Also supports LibSVM
model_path: OutputPath('XGBoostModel'),
model_config_path: OutputPath('XGBoostModelConfig'),
starting_model_path: InputPath('XGBoostModel') = None,
label_column: int = 0,
num_iterations: int = 10,
booster_params: dict = None,
# Booster parameters
objective: str = 'reg:squarederror',
booster: str = 'gbtree',
learning_rate: float = 0.3,
min_split_loss: float = 0,
max_depth: int = 6,
):
'''Train an XGBoost model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import pandas
import xgboost
df = pandas.read_csv(
training_data_path,
)
training_data = xgboost.DMatrix(
data=df.drop(columns=[df.columns[label_column]]),
label=df[df.columns[label_column]],
)
booster_params = booster_params or {}
booster_params.setdefault('objective', objective)
booster_params.setdefault('booster', booster)
booster_params.setdefault('learning_rate', learning_rate)
booster_params.setdefault('min_split_loss', min_split_loss)
booster_params.setdefault('max_depth', max_depth)
starting_model = None
if starting_model_path:
starting_model = xgboost.Booster(model_file=starting_model_path)
model = xgboost.train(
params=booster_params,
dtrain=training_data,
num_boost_round=num_iterations,
xgb_model=starting_model
)
# Saving the model in binary format
model.save_model(model_path)
model_config_str = model.save_config()
with open(model_config_path, 'w') as model_config_file:
model_config_file.write(model_config_str)
if __name__ == '__main__':
create_component_from_func(
xgboost_train,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=[
'xgboost==1.1.1',
'pandas==1.0.5',
],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/component.yaml",
},
)

View File

@ -1,208 +0,0 @@
name: Xgboost train
description: |-
Train an XGBoost model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: training_data, type: CSV}
- {name: starting_model, type: XGBoostModel, optional: true}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: num_iterations, type: Integer, default: '10', optional: true}
- {name: booster_params, type: JsonObject, optional: true}
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
- {name: booster, type: String, default: gbtree, optional: true}
- {name: learning_rate, type: Float, default: '0.3', optional: true}
- {name: min_split_loss, type: Float, default: '0', optional: true}
- {name: max_depth, type: Integer, default: '6', optional: true}
outputs:
- {name: model, type: XGBoostModel}
- {name: model_config, type: XGBoostModelConfig}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'xgboost==1.1.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def xgboost_train(
training_data_path, # Also supports LibSVM
model_path,
model_config_path,
starting_model_path = None,
label_column = 0,
num_iterations = 10,
booster_params = None,
# Booster parameters
objective = 'reg:squarederror',
booster = 'gbtree',
learning_rate = 0.3,
min_split_loss = 0,
max_depth = 6,
):
'''Train an XGBoost model.
Args:
training_data_path: Path for the training data in CSV format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column: Column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import pandas
import xgboost
df = pandas.read_csv(
training_data_path,
)
training_data = xgboost.DMatrix(
data=df.drop(columns=[df.columns[label_column]]),
label=df[df.columns[label_column]],
)
booster_params = booster_params or {}
booster_params.setdefault('objective', objective)
booster_params.setdefault('booster', booster)
booster_params.setdefault('learning_rate', learning_rate)
booster_params.setdefault('min_split_loss', min_split_loss)
booster_params.setdefault('max_depth', max_depth)
starting_model = None
if starting_model_path:
starting_model = xgboost.Booster(model_file=starting_model_path)
model = xgboost.train(
params=booster_params,
dtrain=training_data,
num_boost_round=num_iterations,
xgb_model=starting_model
)
# Saving the model in binary format
model.save_model(model_path)
model_config_str = model.save_config()
with open(model_config_path, 'w') as model_config_file:
model_config_file.write(model_config_str)
import json
import argparse
_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n "reg:squarederror" - Regression with squared loss (default).\n "reg:logistic" - Logistic regression.\n "binary:logistic" - Logistic regression for binary classification, output probability.\n "binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation\n "rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n "rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--booster-params", dest="booster_params", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--objective", dest="objective", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--booster", dest="booster", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--min-split-loss", dest="min_split_loss", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--max-depth", dest="max_depth", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model-config", dest="model_config_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = xgboost_train(**_parsed_args)
args:
- --training-data
- {inputPath: training_data}
- if:
cond: {isPresent: starting_model}
then:
- --starting-model
- {inputPath: starting_model}
- if:
cond: {isPresent: label_column}
then:
- --label-column
- {inputValue: label_column}
- if:
cond: {isPresent: num_iterations}
then:
- --num-iterations
- {inputValue: num_iterations}
- if:
cond: {isPresent: booster_params}
then:
- --booster-params
- {inputValue: booster_params}
- if:
cond: {isPresent: objective}
then:
- --objective
- {inputValue: objective}
- if:
cond: {isPresent: booster}
then:
- --booster
- {inputValue: booster}
- if:
cond: {isPresent: learning_rate}
then:
- --learning-rate
- {inputValue: learning_rate}
- if:
cond: {isPresent: min_split_loss}
then:
- --min-split-loss
- {inputValue: min_split_loss}
- if:
cond: {isPresent: max_depth}
then:
- --max-depth
- {inputValue: max_depth}
- --model
- {outputPath: model}
- --model-config
- {outputPath: model_config}

View File

@ -1,94 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def xgboost_train(
training_data_path: InputPath('ApacheParquet'),
model_path: OutputPath('XGBoostModel'),
model_config_path: OutputPath('XGBoostModelConfig'),
label_column_name: str,
starting_model_path: InputPath('XGBoostModel') = None,
num_iterations: int = 10,
booster_params: dict = None,
# Booster parameters
objective: str = 'reg:squarederror',
booster: str = 'gbtree',
learning_rate: float = 0.3,
min_split_loss: float = 0,
max_depth: int = 6,
):
'''Train an XGBoost model.
Args:
training_data_path: Path for the training data in Apache Parquet format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column_name: Name of the column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import pandas
import xgboost
# Loading data
df = pandas.read_parquet(training_data_path)
training_data = xgboost.DMatrix(
data=df.drop(columns=[label_column_name]),
label=df[[label_column_name]],
)
# Training
booster_params = booster_params or {}
booster_params.setdefault('objective', objective)
booster_params.setdefault('booster', booster)
booster_params.setdefault('learning_rate', learning_rate)
booster_params.setdefault('min_split_loss', min_split_loss)
booster_params.setdefault('max_depth', max_depth)
starting_model = None
if starting_model_path:
starting_model = xgboost.Booster(model_file=starting_model_path)
model = xgboost.train(
params=booster_params,
dtrain=training_data,
num_boost_round=num_iterations,
xgb_model=starting_model
)
# Saving the model in binary format
model.save_model(model_path)
model_config_str = model.save_config()
with open(model_config_path, 'w') as model_config_file:
model_config_file.write(model_config_str)
if __name__ == '__main__':
create_component_from_func(
xgboost_train,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=[
'xgboost==1.1.1',
'pandas==1.0.5',
'pyarrow==0.17.1',
],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/from_ApacheParquet/component.yaml",
},
)

View File

@ -1,204 +0,0 @@
name: Xgboost train
description: |-
Train an XGBoost model.
Args:
training_data_path: Path for the training data in Apache Parquet format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column_name: Name of the column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: training_data, type: ApacheParquet}
- {name: label_column_name, type: String}
- {name: starting_model, type: XGBoostModel, optional: true}
- {name: num_iterations, type: Integer, default: '10', optional: true}
- {name: booster_params, type: JsonObject, optional: true}
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
- {name: booster, type: String, default: gbtree, optional: true}
- {name: learning_rate, type: Float, default: '0.3', optional: true}
- {name: min_split_loss, type: Float, default: '0', optional: true}
- {name: max_depth, type: Integer, default: '6', optional: true}
outputs:
- {name: model, type: XGBoostModel}
- {name: model_config, type: XGBoostModelConfig}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train/from_ApacheParquet/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'xgboost==1.1.1' 'pandas==1.0.5' 'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'xgboost==1.1.1' 'pandas==1.0.5'
'pyarrow==0.17.1' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def xgboost_train(
training_data_path,
model_path,
model_config_path,
label_column_name,
starting_model_path = None,
num_iterations = 10,
booster_params = None,
# Booster parameters
objective = 'reg:squarederror',
booster = 'gbtree',
learning_rate = 0.3,
min_split_loss = 0,
max_depth = 6,
):
'''Train an XGBoost model.
Args:
training_data_path: Path for the training data in Apache Parquet format.
model_path: Output path for the trained model in binary XGBoost format.
model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.
starting_model_path: Path for the existing trained model to start from.
label_column_name: Name of the column containing the label data.
num_boost_rounds: Number of boosting iterations.
booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html
objective: The learning task and the corresponding learning objective.
See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters
The most common values are:
"reg:squarederror" - Regression with squared loss (default).
"reg:logistic" - Logistic regression.
"binary:logistic" - Logistic regression for binary classification, output probability.
"binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation
"rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
"rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
import pandas
import xgboost
# Loading data
df = pandas.read_parquet(training_data_path)
training_data = xgboost.DMatrix(
data=df.drop(columns=[label_column_name]),
label=df[[label_column_name]],
)
# Training
booster_params = booster_params or {}
booster_params.setdefault('objective', objective)
booster_params.setdefault('booster', booster)
booster_params.setdefault('learning_rate', learning_rate)
booster_params.setdefault('min_split_loss', min_split_loss)
booster_params.setdefault('max_depth', max_depth)
starting_model = None
if starting_model_path:
starting_model = xgboost.Booster(model_file=starting_model_path)
model = xgboost.train(
params=booster_params,
dtrain=training_data,
num_boost_round=num_iterations,
xgb_model=starting_model
)
# Saving the model in binary format
model.save_model(model_path)
model_config_str = model.save_config()
with open(model_config_path, 'w') as model_config_file:
model_config_file.write(model_config_str)
import json
import argparse
_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in Apache Parquet format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column_name: Name of the column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n "reg:squarederror" - Regression with squared loss (default).\n "reg:logistic" - Logistic regression.\n "binary:logistic" - Logistic regression for binary classification, output probability.\n "binary:logitraw" - Logistic regression for binary classification, output score before logistic transformation\n "rank:pairwise" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n "rank:ndcg" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--training-data", dest="training_data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--label-column-name", dest="label_column_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--starting-model", dest="starting_model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--num-iterations", dest="num_iterations", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--booster-params", dest="booster_params", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--objective", dest="objective", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--booster", dest="booster", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--learning-rate", dest="learning_rate", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--min-split-loss", dest="min_split_loss", type=float, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--max-depth", dest="max_depth", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model-config", dest="model_config_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = xgboost_train(**_parsed_args)
args:
- --training-data
- {inputPath: training_data}
- --label-column-name
- {inputValue: label_column_name}
- if:
cond: {isPresent: starting_model}
then:
- --starting-model
- {inputPath: starting_model}
- if:
cond: {isPresent: num_iterations}
then:
- --num-iterations
- {inputValue: num_iterations}
- if:
cond: {isPresent: booster_params}
then:
- --booster-params
- {inputValue: booster_params}
- if:
cond: {isPresent: objective}
then:
- --objective
- {inputValue: objective}
- if:
cond: {isPresent: booster}
then:
- --booster
- {inputValue: booster}
- if:
cond: {isPresent: learning_rate}
then:
- --learning-rate
- {inputValue: learning_rate}
- if:
cond: {isPresent: min_split_loss}
then:
- --min-split-loss
- {inputValue: min_split_loss}
- if:
cond: {isPresent: max_depth}
then:
- --max-depth
- {inputValue: max_depth}
- --model
- {outputPath: model}
- --model-config
- {outputPath: model_config}

View File

@ -1,53 +0,0 @@
from collections import OrderedDict
from kfp import components
xgboost_train_regression_and_calculate_metrics_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/18e8974288885086b2fd5351f6333210cd237d1b/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml')
xgboost_5_fold_cross_validation_for_regression_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/271593e4193e2d3e44bdf42269fc03f0fcd2e5e8/components/XGBoost/Cross_validation_for_regression/from_CSV/component.yaml')
def xgboost_train_and_cv_regression_on_csv(
data: 'CSV',
label_column: int = 0,
objective: str = 'reg:squarederror',
num_iterations: int = 200,
):
main_training_and_metrics_task = xgboost_train_regression_and_calculate_metrics_on_csv_op(
training_data=data,
testing_data=data,
label_column=label_column,
objective=objective,
num_iterations=num_iterations,
)
cv_training_and_metrics_task = xgboost_5_fold_cross_validation_for_regression_op(
data=data,
label_column=label_column,
objective=objective,
num_iterations=num_iterations,
)
return OrderedDict([
('model', main_training_and_metrics_task.outputs['model']),
('training_mean_absolute_error', main_training_and_metrics_task.outputs['mean_absolute_error']),
('training_mean_squared_error', main_training_and_metrics_task.outputs['mean_squared_error']),
('training_root_mean_squared_error', main_training_and_metrics_task.outputs['root_mean_squared_error']),
('training_metrics', main_training_and_metrics_task.outputs['metrics']),
('cv_mean_absolute_error', cv_training_and_metrics_task.outputs['mean_absolute_error']),
('cv_mean_squared_error', cv_training_and_metrics_task.outputs['mean_squared_error']),
('cv_root_mean_squared_error', cv_training_and_metrics_task.outputs['root_mean_squared_error']),
('cv_metrics', cv_training_and_metrics_task.outputs['metrics']),
])
if __name__ == '__main__':
xgboost_train_and_cv_regression_on_csv_op = components.create_graph_component_from_pipeline_func(
xgboost_train_and_cv_regression_on_csv,
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml",
},
)

View File

@ -1,339 +0,0 @@
name: Xgboost train and cv regression on csv
inputs:
- {name: data, type: CSV}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
- {name: num_iterations, type: Integer, default: '200', optional: true}
outputs:
- {name: model, type: XGBoostModel}
- {name: training_mean_absolute_error, type: Float}
- {name: training_mean_squared_error, type: Float}
- {name: training_root_mean_squared_error, type: Float}
- {name: training_metrics, type: JsonObject}
- {name: cv_mean_absolute_error, type: Float}
- {name: cv_mean_squared_error, type: Float}
- {name: cv_root_mean_squared_error, type: Float}
- {name: cv_metrics, type: JsonObject}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml'
implementation:
graph:
tasks:
Xgboost train:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
graphInput: {inputName: data}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
graphInput: {inputName: data}
model:
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
graphInput: {inputName: data}
transform_code: df = df[["tips"]]
Remove header:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format, type: CSV}
Calculate regression metrics from csv:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
Split table into folds:
componentRef: {digest: 9956223bcecc7294ca1afac39b60ada4a935a571d817c3dfbf2ea4a211afe3d1,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/e9b4b29b22a5120daf95b581b0392cd461a906f0/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'}
arguments:
table:
graphInput: {inputName: data}
Pandas Transform DataFrame in CSV format 2:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 2:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 2, type: CSV}
Xgboost train 2:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_1, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 2:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 2, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format 3:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 3:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 3, type: CSV}
Xgboost train 3:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_4, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Pandas Transform DataFrame in CSV format 4:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 4:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 4, type: CSV}
Xgboost predict 3:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_4, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 3, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Calculate regression metrics from csv 2:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 4}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 3, type: Text}
Pandas Transform DataFrame in CSV format 5:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_1, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 5:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 5, type: CSV}
Calculate regression metrics from csv 3:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 5}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 2, type: Text}
Xgboost train 4:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_2, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 4:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_2, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 4, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Calculate regression metrics from csv 4:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 3}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 4, type: Text}
Xgboost train 5:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_5, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 5:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 5, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Xgboost train 6:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
taskOutput: {outputName: train_3, taskId: Split table into folds, type: CSV}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict 6:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
taskOutput: {outputName: test_3, taskId: Split table into folds, type: CSV}
model:
taskOutput: {outputName: model, taskId: Xgboost train 6, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Calculate regression metrics from csv 5:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 2}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 6, type: Text}
Pandas Transform DataFrame in CSV format 6:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
taskOutput: {outputName: test_5, taskId: Split table into folds, type: CSV}
transform_code: df = df[["tips"]]
Remove header 6:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format 6, type: CSV}
Calculate regression metrics from csv 6:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header 6}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict 5, type: Text}
Aggregate regression metrics from csv:
componentRef: {digest: 3e128130521eff8d43764f3dcb037316cdd6490ad2878df5adef416f7c2f3c19,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7ea9363fe201918d419fecdc00d1275e657ff712/components/ml_metrics/Aggregate_regression_metrics/component.yaml'}
arguments:
metrics_1:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 3, type: JsonObject}
metrics_2:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 4, type: JsonObject}
metrics_3:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 5, type: JsonObject}
metrics_4:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 2, type: JsonObject}
metrics_5:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics
from csv 6, type: JsonObject}
outputValues:
model:
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
training_mean_absolute_error:
taskOutput: {outputName: mean_absolute_error, taskId: Calculate regression
metrics from csv, type: Float}
training_mean_squared_error:
taskOutput: {outputName: mean_squared_error, taskId: Calculate regression
metrics from csv, type: Float}
training_root_mean_squared_error:
taskOutput: {outputName: root_mean_squared_error, taskId: Calculate regression
metrics from csv, type: Float}
training_metrics:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics from
csv, type: JsonObject}
cv_mean_absolute_error:
taskOutput: {outputName: mean_absolute_error, taskId: Aggregate regression
metrics from csv, type: Float}
cv_mean_squared_error:
taskOutput: {outputName: mean_squared_error, taskId: Aggregate regression
metrics from csv, type: Float}
cv_root_mean_squared_error:
taskOutput: {outputName: root_mean_squared_error, taskId: Aggregate regression
metrics from csv, type: Float}
cv_metrics:
taskOutput: {outputName: metrics, taskId: Aggregate regression metrics from
csv, type: JsonObject}

View File

@ -1,60 +0,0 @@
from collections import OrderedDict
from kfp import components
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
drop_header_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
calculate_regression_metrics_from_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
def xgboost_train_regression_and_calculate_metrics_on_csv(
training_data: 'CSV',
testing_data: 'CSV',
label_column: int = 0,
objective: str = 'reg:squarederror',
num_iterations: int = 200,
):
model = xgboost_train_on_csv_op(
training_data=training_data,
label_column=label_column,
objective=objective,
num_iterations=num_iterations,
).outputs['model']
predictions = xgboost_predict_on_csv_op(
data=testing_data,
model=model,
label_column=label_column,
).output
true_values_table = pandas_transform_csv_op(
table=testing_data,
transform_code='df = df[["tips"]]',
).output
true_values = drop_header_op(true_values_table).output
metrics_task = calculate_regression_metrics_from_csv_op(
true_values=true_values,
predicted_values=predictions,
)
return OrderedDict([
('model', model),
('mean_absolute_error', metrics_task.outputs['mean_absolute_error']),
('mean_squared_error', metrics_task.outputs['mean_squared_error']),
('root_mean_squared_error', metrics_task.outputs['root_mean_squared_error']),
('metrics', metrics_task.outputs['metrics']),
])
if __name__ == '__main__':
xgboost_train_regression_and_calculate_metrics_on_csv_op = components.create_graph_component_from_pipeline_func(
xgboost_train_regression_and_calculate_metrics_on_csv,
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml",
},
)

View File

@ -1,79 +0,0 @@
name: Xgboost train regression and calculate metrics on csv
inputs:
- {name: training_data, type: CSV}
- {name: testing_data, type: CSV}
- {name: label_column, type: Integer, default: '0', optional: true}
- {name: objective, type: String, default: 'reg:squarederror', optional: true}
- {name: num_iterations, type: Integer, default: '200', optional: true}
outputs:
- {name: model, type: XGBoostModel}
- {name: mean_absolute_error, type: Float}
- {name: mean_squared_error, type: Float}
- {name: root_mean_squared_error, type: Float}
- {name: metrics, type: JsonObject}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/XGBoost/Train_regression_and_calculate_metrics/from_CSV/component.yaml'
implementation:
graph:
tasks:
Xgboost train:
componentRef: {digest: 09b80053da29f8f51575b42e5d2e8ad4b7bdcc92a02c3744e189b1f597006b38,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml'}
arguments:
training_data:
graphInput: {inputName: training_data}
label_column:
graphInput: {inputName: label_column}
num_iterations:
graphInput: {inputName: num_iterations}
objective:
graphInput: {inputName: objective}
Xgboost predict:
componentRef: {digest: ecdfaf32cff15b6abc3d0dd80365ce00577f1a19a058fbe201f515431cea1357,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml'}
arguments:
data:
graphInput: {inputName: testing_data}
model:
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
label_column:
graphInput: {inputName: label_column}
Pandas Transform DataFrame in CSV format:
componentRef: {digest: 58dc88349157bf128021708c316ce4eb60bc1de0a5a7dd3af45fabac3276d510,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml'}
arguments:
table:
graphInput: {inputName: testing_data}
transform_code: df = df[["tips"]]
Remove header:
componentRef: {digest: ba35ffea863855b956c3c50aefa0420ba3823949a6c059e6e3971cde960dc5a3,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml'}
arguments:
table:
taskOutput: {outputName: transformed_table, taskId: Pandas Transform DataFrame
in CSV format, type: CSV}
Calculate regression metrics from csv:
componentRef: {digest: e3ecbfeb18032820edfee4255e2fb6d15d15ed224e166519d5e528e12053a995,
url: 'https://raw.githubusercontent.com/kubeflow/pipelines/7da1ac9464b4b3e7d95919faa2f1107a9635b7e4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml'}
arguments:
true_values:
taskOutput: {outputName: table, taskId: Remove header}
predicted_values:
taskOutput: {outputName: predictions, taskId: Xgboost predict, type: Text}
outputValues:
model:
taskOutput: {outputName: model, taskId: Xgboost train, type: XGBoostModel}
mean_absolute_error:
taskOutput: {outputName: mean_absolute_error, taskId: Calculate regression
metrics from csv, type: Float}
mean_squared_error:
taskOutput: {outputName: mean_squared_error, taskId: Calculate regression
metrics from csv, type: Float}
root_mean_squared_error:
taskOutput: {outputName: root_mean_squared_error, taskId: Calculate regression
metrics from csv, type: Float}
metrics:
taskOutput: {outputName: metrics, taskId: Calculate regression metrics from
csv, type: JsonObject}

View File

@ -1,91 +0,0 @@
#!/usr/bin/env python3
# This sample demonstrates continuous training using a train-eval-check recursive loop.
# The main pipeline trains the initial model and then gradually trains the model
# some more until the model evaluation metrics are good enough.
import kfp
from kfp import components
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml')
drop_header_op = kfp.components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/02c9638287468c849632cf9f7885b51de4c66f86/components/tables/Remove_header/component.yaml')
calculate_regression_metrics_from_csv_op = kfp.components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/616542ac0f789914f4eb53438da713dd3004fba4/components/ml_metrics/Calculate_regression_metrics/from_CSV/component.yaml')
# This recursive sub-pipeline trains a model, evaluates it, calculates the metrics and checks them.
# If the model error is too high, then more training is performed until the model is good.
@kfp.dsl.graph_component
def train_until_low_error(starting_model, training_data, true_values):
# Training
model = xgboost_train_on_csv_op(
training_data=training_data,
starting_model=starting_model,
label_column=0,
objective='reg:squarederror',
num_iterations=50,
).outputs['model']
# Predicting
predictions = xgboost_predict_on_csv_op(
data=training_data,
model=model,
label_column=0,
).output
# Calculating the regression metrics
metrics_task = calculate_regression_metrics_from_csv_op(
true_values=true_values,
predicted_values=predictions,
)
# Checking the metrics
with kfp.dsl.Condition(metrics_task.outputs['mean_squared_error'] > 0.01):
# Training some more
train_until_low_error(
starting_model=model,
training_data=training_data,
true_values=true_values,
)
# The main pipleine trains the initial model and then gradually trains the model some more until the model evaluation metrics are good enough.
def train_until_good_pipeline():
# Preparing the training data
training_data = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
).output
# Preparing the true values
true_values_table = pandas_transform_csv_op(
table=training_data,
transform_code='df = df[["tips"]]',
).output
true_values = drop_header_op(true_values_table).output
# Initial model training
first_model = xgboost_train_on_csv_op(
training_data=training_data,
label_column=0,
objective='reg:squarederror',
num_iterations=100,
).outputs['model']
# Recursively training until the error becomes low
train_until_low_error(
starting_model=first_model,
training_data=training_data,
true_values=true_values,
)
if __name__ == '__main__':
kfp_endpoint=None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(train_until_good_pipeline, arguments={})

View File

@ -1,68 +0,0 @@
import kfp
from kfp import components
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
convert_csv_to_apache_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0d7d6f41c92bdc05c2825232afe2b47e5cb6c4b3/components/_converters/ApacheParquet/from_CSV/component.yaml')
xgboost_train_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Train/component.yaml')
xgboost_predict_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/567c04c51ff00a1ee525b3458425b17adbe3df61/components/XGBoost/Predict/component.yaml')
xgboost_train_on_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0ae2f30ff24beeef1c64cc7c434f1f652c065192/components/XGBoost/Train/from_ApacheParquet/component.yaml')
xgboost_predict_on_parquet_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/0ae2f30ff24beeef1c64cc7c434f1f652c065192/components/XGBoost/Predict/from_ApacheParquet/component.yaml')
def xgboost_pipeline():
training_data_csv = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
).output
# Training and prediction on dataset in CSV format
model_trained_on_csv = xgboost_train_on_csv_op(
training_data=training_data_csv,
label_column=0,
objective='reg:squarederror',
num_iterations=200,
).outputs['model']
xgboost_predict_on_csv_op(
data=training_data_csv,
model=model_trained_on_csv,
label_column=0,
)
# Training and prediction on dataset in Apache Parquet format
training_data_parquet = convert_csv_to_apache_parquet_op(
training_data_csv
).output
model_trained_on_parquet = xgboost_train_on_parquet_op(
training_data=training_data_parquet,
label_column_name='tips',
objective='reg:squarederror',
num_iterations=200,
).outputs['model']
xgboost_predict_on_parquet_op(
data=training_data_parquet,
model=model_trained_on_parquet,
label_column_name='tips',
)
# Checking cross-format predictions
xgboost_predict_on_parquet_op(
data=training_data_parquet,
model=model_trained_on_csv,
label_column_name='tips',
)
xgboost_predict_on_csv_op(
data=training_data_csv,
model=model_trained_on_parquet,
label_column=0,
)
if __name__ == '__main__':
kfp_endpoint=None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(xgboost_pipeline, arguments={})

View File

@ -1,34 +0,0 @@
# cross_validation_pipeline compact
import kfp
from kfp import components
chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml')
xgboost_train_and_cv_regression_on_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/1a11ce2aea5243cdcc2b4721675303f78f49ca21/components/XGBoost/Train_and_cross-validate_regression/from_CSV/component.yaml')
def cross_validation_pipeline(
label_column: int = 0,
objective: str = 'reg:squarederror',
num_iterations: int = 200,
):
data = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "{}" AND trip_start_timestamp < "{}"'.format('2019-01-01', '2019-02-01'),
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
).output
xgboost_train_and_cv_regression_on_csv_op(
data=data,
label_column=label_column,
objective=objective,
num_iterations=num_iterations,
)
if __name__ == '__main__':
kfp_endpoint=None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(
cross_validation_pipeline,
arguments={},
)

View File

@ -1,41 +0,0 @@
import kfp
from kfp import components
component_store = components.ComponentStore(url_search_prefixes=['https://raw.githubusercontent.com/kubeflow/pipelines/af3eaf64e87313795cad1add9bfd9fa1e86af6de/components/'])
chicago_taxi_dataset_op = component_store.load_component(name='datasets/Chicago_Taxi_Trips')
convert_csv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_CSV')
convert_tsv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_TSV')
convert_apache_parquet_to_csv_op = component_store.load_component(name='_converters/ApacheParquet/to_CSV')
convert_apache_parquet_to_tsv_op = component_store.load_component(name='_converters/ApacheParquet/to_TSV')
convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component(name='_converters/ApacheParquet/to_ApacheArrowFeather')
convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_ApacheArrowFeather')
def parquet_pipeline():
csv = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
).output
tsv = chicago_taxi_dataset_op(
where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
limit=10000,
format='tsv',
).output
csv_parquet = convert_csv_to_apache_parquet_op(csv).output
csv_parquet_csv = convert_apache_parquet_to_csv_op(csv_parquet).output
csv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(csv_parquet).output
csv_parquet_feather_parquet = convert_apache_arrow_feather_to_apache_parquet_op(csv_parquet_feather).output
tsv_parquet = convert_tsv_to_apache_parquet_op(tsv).output
tsv_parquet_tsv = convert_apache_parquet_to_tsv_op(tsv_parquet).output
tsv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(tsv_parquet).output
tsv_parquet_feather_parquet = convert_apache_arrow_feather_to_apache_parquet_op(tsv_parquet_feather).output
if __name__ == '__main__':
kfp_endpoint = None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(parquet_pipeline, arguments={})

View File

@ -1,31 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_apache_arrow_feather_to_apache_parquet(
data_path: InputPath('ApacheArrowFeather'),
output_data_path: OutputPath('ApacheParquet'),
):
'''Converts Apache Arrow Feather to Apache Parquet.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import feather, parquet
table = feather.read_table(data_path)
parquet.write_table(table, output_data_path)
if __name__ == '__main__':
create_component_from_func(
convert_apache_arrow_feather_to_apache_parquet,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml",
},
)

View File

@ -1,78 +0,0 @@
name: Convert apache arrow feather to apache parquet
description: |-
Converts Apache Arrow Feather to Apache Parquet.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: ApacheArrowFeather}
outputs:
- {name: output_data, type: ApacheParquet}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_ApacheArrowFeather/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_apache_arrow_feather_to_apache_parquet(
data_path,
output_data_path,
):
'''Converts Apache Arrow Feather to Apache Parquet.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import feather, parquet
table = feather.read_table(data_path)
parquet.write_table(table, output_data_path)
import argparse
_parser = argparse.ArgumentParser(prog='Convert apache arrow feather to apache parquet', description='Converts Apache Arrow Feather to Apache Parquet.\n\n [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = convert_apache_arrow_feather_to_apache_parquet(**_parsed_args)
_output_serializers = [
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,30 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_csv_to_apache_parquet(
data_path: InputPath('CSV'),
output_data_path: OutputPath('ApacheParquet'),
):
'''Converts CSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import csv, parquet
table = csv.read_csv(data_path)
parquet.write_table(table, output_data_path)
if __name__ == '__main__':
create_component_from_func(
convert_csv_to_apache_parquet,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_CSV/component.yaml",
},
)

View File

@ -1,76 +0,0 @@
name: Convert csv to apache parquet
description: |-
Converts CSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: CSV}
outputs:
- {name: output_data, type: ApacheParquet}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_csv_to_apache_parquet(
data_path,
output_data_path,
):
'''Converts CSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import csv, parquet
table = csv.read_csv(data_path)
parquet.write_table(table, output_data_path)
import argparse
_parser = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = convert_csv_to_apache_parquet(**_parsed_args)
_output_serializers = [
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,30 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_tsv_to_apache_parquet(
data_path: InputPath('TSV'),
output_data_path: OutputPath('ApacheParquet'),
):
'''Converts TSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import csv, parquet
table = csv.read_csv(data_path, parse_options=csv.ParseOptions(delimiter='\t'))
parquet.write_table(table, output_data_path)
if __name__ == '__main__':
create_component_from_func(
convert_tsv_to_apache_parquet,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_TSV/component.yaml",
},
)

View File

@ -1,76 +0,0 @@
name: Convert tsv to apache parquet
description: |-
Converts TSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: TSV}
outputs:
- {name: output_data, type: ApacheParquet}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/from_TSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
--quiet --no-warn-script-location 'pyarrow==0.17.1' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_tsv_to_apache_parquet(
data_path,
output_data_path,
):
'''Converts TSV table to Apache Parquet.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import csv, parquet
table = csv.read_csv(data_path, parse_options=csv.ParseOptions(delimiter='\t'))
parquet.write_table(table, output_data_path)
import argparse
_parser = argparse.ArgumentParser(prog='Convert tsv to apache parquet', description='Converts TSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = convert_tsv_to_apache_parquet(**_parsed_args)
_output_serializers = [
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,31 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_apache_parquet_to_apache_arrow_feather(
data_path: InputPath('ApacheParquet'),
output_data_path: OutputPath('ApacheArrowFeather'),
):
'''Converts Apache Parquet to Apache Arrow Feather.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import feather, parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
feather.write_feather(data_frame, output_data_path)
if __name__ == '__main__':
convert_apache_parquet_to_apache_arrow_feather_op = create_component_from_func(
convert_apache_parquet_to_apache_arrow_feather,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml",
},
)

View File

@ -1,79 +0,0 @@
name: Convert apache parquet to apache arrow feather
description: |-
Converts Apache Parquet to Apache Arrow Feather.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: ApacheParquet}
outputs:
- {name: output_data, type: ApacheArrowFeather}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_ApacheArrowFeather/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_apache_parquet_to_apache_arrow_feather(
data_path,
output_data_path,
):
'''Converts Apache Parquet to Apache Arrow Feather.
[Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import feather, parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
feather.write_feather(data_frame, output_data_path)
import argparse
_parser = argparse.ArgumentParser(prog='Convert apache parquet to apache arrow feather', description='Converts Apache Parquet to Apache Arrow Feather.\n\n [Apache Arrow Feather](https://arrow.apache.org/docs/python/feather.html)\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = convert_apache_parquet_to_apache_arrow_feather(**_parsed_args)
_output_serializers = [
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,33 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_apache_parquet_to_csv(
data_path: InputPath('ApacheParquet'),
output_data_path: OutputPath('CSV'),
):
'''Converts Apache Parquet to CSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
data_frame.to_csv(
output_data_path,
index=False,
)
if __name__ == '__main__':
convert_apache_parquet_to_csv_op = create_component_from_func(
convert_apache_parquet_to_csv,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_CSV/component.yaml",
},
)

View File

@ -1,66 +0,0 @@
name: Convert apache parquet to csv
description: |-
Converts Apache Parquet to CSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: ApacheParquet}
outputs:
- {name: output_data, type: CSV}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_CSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_apache_parquet_to_csv(
data_path,
output_data_path,
):
'''Converts Apache Parquet to CSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
data_frame.to_csv(
output_data_path,
index=False,
)
import argparse
_parser = argparse.ArgumentParser(prog='Convert apache parquet to csv', description='Converts Apache Parquet to CSV.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = convert_apache_parquet_to_csv(**_parsed_args)
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,34 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def convert_apache_parquet_to_tsv(
data_path: InputPath('ApacheParquet'),
output_data_path: OutputPath('TSV'),
):
'''Converts Apache Parquet to TSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
data_frame.to_csv(
output_data_path,
index=False,
sep='\t',
)
if __name__ == '__main__':
convert_apache_parquet_to_tsv_op = create_component_from_func(
convert_apache_parquet_to_tsv,
output_component_file='component.yaml',
base_image='python:3.7',
packages_to_install=['pyarrow==0.17.1', 'pandas==1.0.3'],
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_TSV/component.yaml",
},
)

View File

@ -1,67 +0,0 @@
name: Convert apache parquet to tsv
description: |-
Converts Apache Parquet to TSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
inputs:
- {name: data, type: ApacheParquet}
outputs:
- {name: output_data, type: TSV}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/ApacheParquet/to_TSV/component.yaml'
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'pyarrow==0.17.1' 'pandas==1.0.3' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'pyarrow==0.17.1' 'pandas==1.0.3'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_apache_parquet_to_tsv(
data_path,
output_data_path,
):
'''Converts Apache Parquet to TSV.
[Apache Parquet](https://parquet.apache.org/)
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pyarrow import parquet
data_frame = parquet.read_pandas(data_path).to_pandas()
data_frame.to_csv(
output_data_path,
index=False,
sep='\t',
)
import argparse
_parser = argparse.ArgumentParser(prog='Convert apache parquet to tsv', description='Converts Apache Parquet to TSV.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-data", dest="output_data_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = convert_apache_parquet_to_tsv(**_parsed_args)
args:
- --data
- {inputPath: data}
- --output-data
- {outputPath: output_data}

View File

@ -1,33 +0,0 @@
from kfp.components import create_component_from_func, InputPath, OutputPath
def keras_convert_hdf5_model_to_tf_saved_model(
model_path: InputPath('KerasModelHdf5'),
converted_model_path: OutputPath('TensorflowSavedModel'),
):
'''Converts Keras HDF5 model to Tensorflow SavedModel format.
Args:
model_path: Keras model in HDF5 format.
converted_model_path: Keras model in Tensorflow SavedModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
from tensorflow import keras
model = keras.models.load_model(filepath=model_path)
keras.models.save_model(model=model, filepath=converted_model_path, save_format='tf')
if __name__ == '__main__':
keras_convert_hdf5_model_to_tf_saved_model_op = create_component_from_func(
keras_convert_hdf5_model_to_tf_saved_model,
base_image='tensorflow/tensorflow:2.3.0',
packages_to_install=['h5py==2.10.0'],
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml",
},
)

View File

@ -1,59 +0,0 @@
name: Keras convert hdf5 model to tf saved model
description: Converts Keras HDF5 model to Tensorflow SavedModel format.
inputs:
- {name: model, type: KerasModelHdf5, description: Keras model in HDF5 format.}
outputs:
- {name: converted_model, type: TensorflowSavedModel, description: Keras model in Tensorflow SavedModel format.}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/KerasModelHdf5/to_TensorflowSavedModel/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'h5py==2.10.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet
--no-warn-script-location 'h5py==2.10.0' --user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def keras_convert_hdf5_model_to_tf_saved_model(
model_path,
converted_model_path,
):
'''Converts Keras HDF5 model to Tensorflow SavedModel format.
Args:
model_path: Keras model in HDF5 format.
converted_model_path: Keras model in Tensorflow SavedModel format.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
from tensorflow import keras
model = keras.models.load_model(filepath=model_path)
keras.models.save_model(model=model, filepath=converted_model_path, save_format='tf')
import argparse
_parser = argparse.ArgumentParser(prog='Keras convert hdf5 model to tf saved model', description='Converts Keras HDF5 model to Tensorflow SavedModel format.')
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = keras_convert_hdf5_model_to_tf_saved_model(**_parsed_args)
args:
- --model
- {inputPath: model}
- --converted-model
- {outputPath: converted_model}

View File

@ -1,25 +0,0 @@
name: To ONNX from Keras HDF5 model
inputs:
- {name: Model, type: KerasModelHdf5}
outputs:
- {name: Model, type: OnnxModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/from_KerasModelHdf5/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- python3 -m pip install tf2onnx==1.6.3 && "$0" "$@"
- python3
- -m
- tf2onnx.convert
- --keras
- {inputPath: Model}
- --output
- {outputPath: Model}
- --fold_const
- --verbose

View File

@ -1,25 +0,0 @@
name: To ONNX from Tensorflow SavedModel
inputs:
- {name: Model, type: TensorflowSavedModel}
outputs:
- {name: Model, type: OnnxModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/from_TensorflowSavedModel/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- python3 -m pip install tf2onnx==1.6.3 && "$0" "$@"
- python3
- -m
- tf2onnx.convert
- --saved-model
- {inputPath: Model}
- --output
- {outputPath: Model}
- --fold_const
- --verbose

View File

@ -1,26 +0,0 @@
from kfp.components import create_component_from_func, InputPath, OutputPath
def convert_to_tensorflow_saved_model_from_onnx_model(
model_path: InputPath('OnnxModel'),
converted_model_path: OutputPath('TensorflowSavedModel'),
):
import onnx
import onnx_tf
onnx_model = onnx.load(model_path)
tf_rep = onnx_tf.backend.prepare(onnx_model)
tf_rep.export_graph(converted_model_path)
if __name__ == '__main__':
convert_to_tensorflow_saved_model_from_onnx_model_op = create_component_from_func(
convert_to_tensorflow_saved_model_from_onnx_model,
output_component_file='component.yaml',
base_image='tensorflow/tensorflow:2.4.1',
packages_to_install=['onnx-tf==1.7.0', 'onnx==1.8.0'], # onnx-tf==1.7.0 is not compatible with onnx==1.8.1
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/to_TensorflowSavedModel/component.yaml",
},
)

View File

@ -1,54 +0,0 @@
name: Convert to tensorflow saved model from onnx model
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/OnnxModel/to_TensorflowSavedModel/component.yaml'
inputs:
- {name: model, type: OnnxModel}
outputs:
- {name: converted_model, type: TensorflowModel}
implementation:
container:
image: tensorflow/tensorflow:2.4.1
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'onnx-tf==1.7.0' 'onnx==1.8.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m
pip install --quiet --no-warn-script-location 'onnx-tf==1.7.0' 'onnx==1.8.0'
--user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def convert_to_tensorflow_saved_model_from_onnx_model(
model_path,
converted_model_path,
):
import onnx
import onnx_tf
onnx_model = onnx.load(model_path)
tf_rep = onnx_tf.backend.prepare(onnx_model)
tf_rep.export_graph(converted_model_path)
import argparse
_parser = argparse.ArgumentParser(prog='Convert to tensorflow saved model from onnx model', description='')
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--converted-model", dest="converted_model_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = convert_to_tensorflow_saved_model_from_onnx_model(**_parsed_args)
args:
- --model
- {inputPath: model}
- --converted-model
- {outputPath: converted_model}

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/_converters](https://github.com/kubeflow/pipelines/tree/master/components/contrib/_converters). This directory will be removed by the end of 2021.

View File

@ -1,25 +0,0 @@
name: Convert Keras HDF5 model to Tensorflow JS GraphModel
inputs:
- {name: Model, type: KerasModelHdf5}
outputs:
- {name: Model, type: TensorflowJSGraphModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSGraphModel/from_KerasModelHdf5/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
"$0" "$*"
- tensorflowjs_converter
- --input_format=keras
- --output_format=tfjs_graph_model
- inputPath: Model
- outputPath: Model

View File

@ -1,25 +0,0 @@
name: Convert Tensorflow SavedModel to Tensorflow JS GraphModel
inputs:
- {name: Model, type: TensorflowSavedModel}
outputs:
- {name: Model, type: TensorflowJSGraphModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSGraphModel/from_TensorflowSavedModel/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
"$0" "$*"
- tensorflowjs_converter
- --input_format=tf_saved_model
- --output_format=tfjs_graph_model
- inputPath: Model
- outputPath: Model

View File

@ -1,25 +0,0 @@
name: Convert Keras HDF5 model to Tensorflow JS LayersModel
inputs:
- {name: Model, type: KerasModelHdf5}
outputs:
- {name: Model, type: TensorflowJSLayersModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSLayersModel/from_KerasModelHdf5/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
"$0" "$*"
- tensorflowjs_converter
- --input_format=keras
- --output_format=tfjs_layers_model
- inputPath: Model
- outputPath: Model

View File

@ -1,25 +0,0 @@
name: Convert Keras SavedModel to Tensorflow JS LayersModel
inputs:
- {name: Model, type: TensorflowSavedModel}
outputs:
- {name: Model, type: TensorflowJSLayersModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowJSLayersModel/from_TensorflowSavedModel/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
# Manually installing prerequisites so that tensorflowjs does not re-install tensorflow-cpu on top of tensorflow. See https://github.com/tensorflow/tfjs/issues/3953
python3 -m pip install --quiet 'h5py>=2.8.0' 'numpy>=1.16.4,<1.19.0' 'six>=1.12.0' 'tensorflow-hub==0.7.0' 'PyInquirer==1.0.3'
python3 -m pip install --quiet tensorflowjs==2.4.0 --no-dependencies
"$0" "$*"
- tensorflowjs_converter
- --input_format=keras_saved_model
- --output_format=tfjs_layers_model
- inputPath: Model
- outputPath: Model

View File

@ -1,23 +0,0 @@
name: Convert Keras HDF5 model to Tensorflow Lite model
inputs:
- {name: Model, type: KerasModelHdf5}
outputs:
- {name: Model, type: TensorflowLiteModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowLiteModel/from_KerasModelHdf5/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
model_path="$0"
output_model_path="$1"
mkdir -p "$(dirname "$output_model_path")"
tflite_convert --keras_model_file "$model_path" --output_file "$output_model_path"
- {inputPath: Model}
- {outputPath: Model}

View File

@ -1,23 +0,0 @@
name: Convert Tensorflow SavedModel to Tensorflow Lite model
inputs:
- {name: Model, type: TensorflowSavedModel}
outputs:
- {name: Model, type: TensorflowLiteModel}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/_converters/TensorflowLiteModel/from_TensorflowSavedModel/component.yaml'
implementation:
container:
image: tensorflow/tensorflow:2.3.0
command:
- sh
- -exc
- |
model_path="$0"
output_model_path="$1"
mkdir -p "$(dirname "$output_model_path")"
tflite_convert --saved_model_dir "$model_path" --output_file "$output_model_path"
- {inputPath: Model}
- {outputPath: Model}

View File

@ -1,44 +0,0 @@
name: Calculate data hash
inputs:
- {name: Data}
- {name: Hash algorithm, type: String, default: SHA256, description: "Hash algorithm to use. Supported values are MD5, SHA1, SHA256, SHA512, SHA3"}
outputs:
- {name: Hash}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/basics/Calculate_hash/component.yaml'
implementation:
container:
image: alpine
command:
- sh
- -exc
- |
data_path="$0"
hash_algorithm="$1"
hash_path="$2"
mkdir -p "$(dirname "$hash_path")"
hash_algorithm=$(echo "$hash_algorithm" | tr '[:upper:]' '[:lower:]')
case "$hash_algorithm" in
md5|sha1|sha256|sha512|sha3) hash_program="${hash_algorithm}sum";;
*) echo "Unsupported hash algorithm $hash_algorithm"; exit 1;;
esac
if [ -d "$data_path" ]; then
# Calculating hash for directory
cd "$data_path"
find . -type f -print0 |
sort -z |
xargs -0 "$hash_program" |
"$hash_program" |
cut -d ' ' -f 1 > "$hash_path"
else
# Calculating hash for file
"$hash_program" "$data_path" |
cut -d ' ' -f 1 > "$hash_path"
fi
- {inputPath: Data}
- {inputValue: Hash algorithm}
- {outputPath: Hash}

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/basics](https://github.com/kubeflow/pipelines/tree/master/components/contrib/basics). This directory will be removed by the end of 2021.

View File

@ -1,59 +0,0 @@
#!/bin/bash -e
# Copyright 2018 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
while getopts ":hp:t:i:l:" opt; do
case "${opt}" in
h) echo "-p: project name"
echo "-t: tag name"
echo "-i: image name. If provided, project name and tag name are not necessary"
echo "-l: local image name."
exit
;;
p) PROJECT_ID=${OPTARG}
;;
t) TAG_NAME=${OPTARG}
;;
i) IMAGE_NAME=${OPTARG}
;;
l) LOCAL_IMAGE_NAME=${OPTARG}
;;
\? ) echo "Usage: cmd [-p] project [-t] tag [-i] image [-l] local image"
exit
;;
esac
done
if [ -z "${PROJECT_ID}" ]; then
PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
fi
if [ -z "${TAG_NAME}" ]; then
TAG_NAME=$(date +v%Y%m%d)-$(git describe --tags --always --dirty)-$(git diff | shasum -a256 | cut -c -6)
fi
if [ -z "${IMAGE_NAME}" ]; then
docker pull gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest || true
fi
docker build -t ${LOCAL_IMAGE_NAME} . --cache-from gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
if [ -z "${IMAGE_NAME}" ]; then
docker tag ${LOCAL_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
docker tag ${LOCAL_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
docker push gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
docker push gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:latest
else
docker tag ${LOCAL_IMAGE_NAME} "${IMAGE_NAME}"
docker push "${IMAGE_NAME}"
fi

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/dataset_manipulation](https://github.com/kubeflow/pipelines/tree/master/components/contrib/dataset_manipulation). This directory will be removed by the end of 2021.

View File

@ -1,90 +0,0 @@
from kfp.components import InputPath, OutputPath, create_component_from_func
def split_table_into_folds(
table_path: InputPath('CSV'),
train_1_path: OutputPath('CSV'),
train_2_path: OutputPath('CSV'),
train_3_path: OutputPath('CSV'),
train_4_path: OutputPath('CSV'),
train_5_path: OutputPath('CSV'),
test_1_path: OutputPath('CSV'),
test_2_path: OutputPath('CSV'),
test_3_path: OutputPath('CSV'),
test_4_path: OutputPath('CSV'),
test_5_path: OutputPath('CSV'),
number_of_folds: int = 5,
random_seed: int = 0,
):
"""Splits the data table into the specified number of folds.
The data is split into the specified number of folds k (default: 5).
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
Each training subsample has (k-1)/k fraction of samples.
The train_i subsample is produced by excluding test_i subsample form all samples.
Inputs:
table: The data to split by rows
number_of_folds: Number of folds to split data into
random_seed: Random seed for reproducible splitting
Outputs:
train_i: The i-th training subsample
test_i: The i-th testing subsample
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
"""
import pandas
from sklearn import model_selection
max_number_of_folds = 5
if number_of_folds < 1 or number_of_folds > max_number_of_folds:
raise ValueError('Number of folds must be between 1 and {}.'.format(max_number_of_folds))
df = pandas.read_csv(
table_path,
)
splitter = model_selection.KFold(
n_splits=number_of_folds,
shuffle=True,
random_state=random_seed,
)
folds = list(splitter.split(df))
fold_paths = [
(train_1_path, test_1_path),
(train_2_path, test_2_path),
(train_3_path, test_3_path),
(train_4_path, test_4_path),
(train_5_path, test_5_path),
]
for i in range(max_number_of_folds):
(train_path, test_path) = fold_paths[i]
if i < len(folds):
(train_indices, test_indices) = folds[i]
train_fold = df.iloc[train_indices]
test_fold = df.iloc[test_indices]
else:
train_fold = df.iloc[0:0]
test_fold = df.iloc[0:0]
train_fold.to_csv(train_path, index=False)
test_fold.to_csv(test_path, index=False)
if __name__ == '__main__':
split_table_into_folds_op = create_component_from_func(
split_table_into_folds,
base_image='python:3.7',
packages_to_install=['scikit-learn==0.23.1', 'pandas==1.0.5'],
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml",
},
)

View File

@ -1,185 +0,0 @@
name: Split table into folds
description: |-
Splits the data table into the specified number of folds.
The data is split into the specified number of folds k (default: 5).
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
Each training subsample has (k-1)/k fraction of samples.
The train_i subsample is produced by excluding test_i subsample form all samples.
Inputs:
table: The data to split by rows
number_of_folds: Number of folds to split data into
random_seed: Random seed for reproducible splitting
Outputs:
train_i: The i-th training subsample
test_i: The i-th testing subsample
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/dataset_manipulation/split_data_into_folds/in_CSV/component.yaml'
inputs:
- {name: table, type: CSV}
- {name: number_of_folds, type: Integer, default: '5', optional: true}
- {name: random_seed, type: Integer, default: '0', optional: true}
outputs:
- {name: train_1, type: CSV}
- {name: train_2, type: CSV}
- {name: train_3, type: CSV}
- {name: train_4, type: CSV}
- {name: train_5, type: CSV}
- {name: test_1, type: CSV}
- {name: test_2, type: CSV}
- {name: test_3, type: CSV}
- {name: test_4, type: CSV}
- {name: test_5, type: CSV}
implementation:
container:
image: python:3.7
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'scikit-learn==0.23.1' 'pandas==1.0.5' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3
-m pip install --quiet --no-warn-script-location 'scikit-learn==0.23.1' 'pandas==1.0.5'
--user) && "$0" "$@"
- python3
- -u
- -c
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def split_table_into_folds(
table_path,
train_1_path,
train_2_path,
train_3_path,
train_4_path,
train_5_path,
test_1_path,
test_2_path,
test_3_path,
test_4_path,
test_5_path,
number_of_folds = 5,
random_seed = 0,
):
"""Splits the data table into the specified number of folds.
The data is split into the specified number of folds k (default: 5).
Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.
Each training subsample has (k-1)/k fraction of samples.
The train_i subsample is produced by excluding test_i subsample form all samples.
Inputs:
table: The data to split by rows
number_of_folds: Number of folds to split data into
random_seed: Random seed for reproducible splitting
Outputs:
train_i: The i-th training subsample
test_i: The i-th testing subsample
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
"""
import pandas
from sklearn import model_selection
max_number_of_folds = 5
if number_of_folds < 1 or number_of_folds > max_number_of_folds:
raise ValueError('Number of folds must be between 1 and {}.'.format(max_number_of_folds))
df = pandas.read_csv(
table_path,
)
splitter = model_selection.KFold(
n_splits=number_of_folds,
shuffle=True,
random_state=random_seed,
)
folds = list(splitter.split(df))
fold_paths = [
(train_1_path, test_1_path),
(train_2_path, test_2_path),
(train_3_path, test_3_path),
(train_4_path, test_4_path),
(train_5_path, test_5_path),
]
for i in range(max_number_of_folds):
(train_path, test_path) = fold_paths[i]
if i < len(folds):
(train_indices, test_indices) = folds[i]
train_fold = df.iloc[train_indices]
test_fold = df.iloc[test_indices]
else:
train_fold = df.iloc[0:0]
test_fold = df.iloc[0:0]
train_fold.to_csv(train_path, index=False)
test_fold.to_csv(test_path, index=False)
import argparse
_parser = argparse.ArgumentParser(prog='Split table into folds', description='Splits the data table into the specified number of folds.\n\n The data is split into the specified number of folds k (default: 5).\n Each testing subsample has 1/k fraction of samples. The testing subsamples do not overlap.\n Each training subsample has (k-1)/k fraction of samples.\n The train_i subsample is produced by excluding test_i subsample form all samples.\n\n Inputs:\n table: The data to split by rows\n number_of_folds: Number of folds to split data into\n random_seed: Random seed for reproducible splitting\n\n Outputs:\n train_i: The i-th training subsample\n test_i: The i-th testing subsample\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>')
_parser.add_argument("--table", dest="table_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--number-of-folds", dest="number_of_folds", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--random-seed", dest="random_seed", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--train-1", dest="train_1_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--train-2", dest="train_2_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--train-3", dest="train_3_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--train-4", dest="train_4_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--train-5", dest="train_5_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--test-1", dest="test_1_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--test-2", dest="test_2_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--test-3", dest="test_3_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--test-4", dest="test_4_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--test-5", dest="test_5_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = split_table_into_folds(**_parsed_args)
args:
- --table
- {inputPath: table}
- if:
cond: {isPresent: number_of_folds}
then:
- --number-of-folds
- {inputValue: number_of_folds}
- if:
cond: {isPresent: random_seed}
then:
- --random-seed
- {inputValue: random_seed}
- --train-1
- {outputPath: train_1}
- --train-2
- {outputPath: train_2}
- --train-3
- {outputPath: train_3}
- --train-4
- {outputPath: train_4}
- --train-5
- {outputPath: train_5}
- --test-1
- {outputPath: test_1}
- --test-2
- {outputPath: test_2}
- --test-3
- {outputPath: test_3}
- --test-4
- {outputPath: test_4}
- --test-5
- {outputPath: test_5}

View File

@ -1,43 +0,0 @@
name: Chicago Taxi Trips dataset
description: |
City of Chicago Taxi Trips dataset: https://data.cityofchicago.org/Transportation/Taxi-Trips/wrvz-psew
The input parameters configure the SQL query to the database.
The dataset is pretty big, so limit the number of results using the `Limit` or `Where` parameters.
Read [Socrata dev](https://dev.socrata.com/docs/queries/) for the advanced query syntax
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/Chicago_Taxi_Trips/component.yaml'
inputs:
- {name: Where, type: String, default: 'trip_start_timestamp>="1900-01-01" AND trip_start_timestamp<"2100-01-01"'}
- {name: Limit, type: Integer, default: '1000', description: 'Number of rows to return. The rows are randomly sampled.'}
- {name: Select, type: String, default: 'trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_census_tract,dropoff_census_tract,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location'}
- {name: Format, type: String, default: 'csv', description: 'Output data format. Suports csv,tsv,cml,rdf,json'}
outputs:
- {name: Table, description: 'Result type depends on format. CSV and TSV have header.'}
implementation:
container:
# image: curlimages/curl # Sets a non-root user which cannot write to mounted volumes. See https://github.com/curl/curl-docker/issues/22
image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342
command:
- sh
- -c
- |
set -e -x -o pipefail
output_path="$0"
select="$1"
where="$2"
limit="$3"
format="$4"
mkdir -p "$(dirname "$output_path")"
curl --get 'https://data.cityofchicago.org/resource/wrvz-psew.'"${format}" \
--data-urlencode '$limit='"${limit}" \
--data-urlencode '$where='"${where}" \
--data-urlencode '$select='"${select}" \
| tr -d '"' > "$output_path" # Removing unneeded quotes around all numbers
- {outputPath: Table}
- {inputValue: Select}
- {inputValue: Where}
- {inputValue: Limit}
- {inputValue: Format}

View File

@ -1,30 +0,0 @@
from typing import NamedTuple
from kfp.components import create_component_from_func, OutputPath
def load_dataset_using_huggingface(
dataset_name: str,
dataset_dict_path: OutputPath('HuggingFaceDatasetDict'),
) -> NamedTuple('Outputs', [
('splits', list),
]):
from datasets import load_dataset
dataset_dict = load_dataset(dataset_name)
dataset_dict.save_to_disk(dataset_dict_path)
splits = list(dataset_dict.keys())
return (splits,)
if __name__ == '__main__':
load_dataset_op = create_component_from_func(
load_dataset_using_huggingface,
base_image='python:3.9',
packages_to_install=['datasets==1.6.2'],
annotations={
'author': 'Alexey Volkov <alexey.volkov@ark-kun.com>',
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Load_dataset/component.yaml",
},
output_component_file='component.yaml',
)

View File

@ -1,83 +0,0 @@
name: Load dataset using huggingface
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Load_dataset/component.yaml'
inputs:
- {name: dataset_name, type: String}
outputs:
- {name: dataset_dict, type: HuggingFaceDatasetDict}
- {name: splits, type: JsonArray}
implementation:
container:
image: python:3.9
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'datasets==1.6.2' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
--quiet --no-warn-script-location 'datasets==1.6.2' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def load_dataset_using_huggingface(
dataset_name,
dataset_dict_path,
):
from datasets import load_dataset
dataset_dict = load_dataset(dataset_name)
dataset_dict.save_to_disk(dataset_dict_path)
splits = list(dataset_dict.keys())
return (splits,)
def _serialize_json(obj) -> str:
if isinstance(obj, str):
return obj
import json
def default_serializer(obj):
if hasattr(obj, 'to_struct'):
return obj.to_struct()
else:
raise TypeError("Object of type '%s' is not JSON serializable and does not have .to_struct() method." % obj.__class__.__name__)
return json.dumps(obj, default=default_serializer, sort_keys=True)
import argparse
_parser = argparse.ArgumentParser(prog='Load dataset using huggingface', description='')
_parser.add_argument("--dataset-name", dest="dataset_name", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-dict", dest="dataset_dict_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
_parsed_args = vars(_parser.parse_args())
_output_files = _parsed_args.pop("_output_paths", [])
_outputs = load_dataset_using_huggingface(**_parsed_args)
_output_serializers = [
_serialize_json,
]
import os
for idx, output_file in enumerate(_output_files):
try:
os.makedirs(os.path.dirname(output_file))
except OSError:
pass
with open(output_file, 'w') as f:
f.write(_output_serializers[idx](_outputs[idx]))
args:
- --dataset-name
- {inputValue: dataset_name}
- --dataset-dict
- {outputPath: dataset_dict}
- '----output-paths'
- {outputPath: splits}

View File

@ -1,36 +0,0 @@
from kfp.components import create_component_from_func, InputPath, OutputPath
def split_dataset_huggingface(
dataset_dict_path: InputPath('HuggingFaceDatasetDict'),
dataset_split_path: OutputPath('HuggingFaceDataset'),
dataset_path: OutputPath('HuggingFaceArrowDataset'),
# dataset_indices_path: OutputPath('HuggingFaceArrowDataset'),
dataset_info_path: OutputPath(dict),
dataset_state_path: OutputPath(dict),
split_name: str = None,
):
import os
import shutil
from datasets import config as datasets_config
print(f'DatasetDict contents: {os.listdir(dataset_dict_path)}')
shutil.copytree(os.path.join(dataset_dict_path, split_name), dataset_split_path)
print(f'Dataset contents: {os.listdir(os.path.join(dataset_dict_path, split_name))}')
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_ARROW_FILENAME), dataset_path)
# shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INDICES_FILENAME), dataset_indices_path)
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INFO_FILENAME), dataset_info_path)
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_STATE_JSON_FILENAME), dataset_state_path)
if __name__ == '__main__':
split_dataset_op = create_component_from_func(
split_dataset_huggingface,
base_image='python:3.9',
packages_to_install=['datasets==1.6.2'],
annotations={
'author': 'Alexey Volkov <alexey.volkov@ark-kun.com>',
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Split_dataset/component.yaml",
},
output_component_file='component.yaml',
)

View File

@ -1,82 +0,0 @@
name: Split dataset huggingface
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/datasets/HuggingFace/Split_dataset/component.yaml'
inputs:
- {name: dataset_dict, type: HuggingFaceDatasetDict}
- {name: split_name, type: String, optional: true}
outputs:
- {name: dataset_split, type: HuggingFaceDataset}
- {name: dataset, type: HuggingFaceArrowDataset}
- {name: dataset_info, type: JsonObject}
- {name: dataset_state, type: JsonObject}
implementation:
container:
image: python:3.9
command:
- sh
- -c
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'datasets==1.6.2' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install
--quiet --no-warn-script-location 'datasets==1.6.2' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def split_dataset_huggingface(
dataset_dict_path,
dataset_split_path,
dataset_path,
# dataset_indices_path: OutputPath('HuggingFaceArrowDataset'),
dataset_info_path,
dataset_state_path,
split_name = None,
):
import os
import shutil
from datasets import config as datasets_config
print(f'DatasetDict contents: {os.listdir(dataset_dict_path)}')
shutil.copytree(os.path.join(dataset_dict_path, split_name), dataset_split_path)
print(f'Dataset contents: {os.listdir(os.path.join(dataset_dict_path, split_name))}')
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_ARROW_FILENAME), dataset_path)
# shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INDICES_FILENAME), dataset_indices_path)
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_INFO_FILENAME), dataset_info_path)
shutil.copy(os.path.join(dataset_dict_path, split_name, datasets_config.DATASET_STATE_JSON_FILENAME), dataset_state_path)
import argparse
_parser = argparse.ArgumentParser(prog='Split dataset huggingface', description='')
_parser.add_argument("--dataset-dict", dest="dataset_dict_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--split-name", dest="split_name", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-split", dest="dataset_split_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset", dest="dataset_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-info", dest="dataset_info_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--dataset-state", dest="dataset_state_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = split_dataset_huggingface(**_parsed_args)
args:
- --dataset-dict
- {inputPath: dataset_dict}
- if:
cond: {isPresent: split_name}
then:
- --split-name
- {inputValue: split_name}
- --dataset-split
- {outputPath: dataset_split}
- --dataset
- {outputPath: dataset}
- --dataset-info
- {outputPath: dataset_info}
- --dataset-state
- {outputPath: dataset_state}

View File

@ -1,24 +0,0 @@
from kfp import components
from kfp import dsl
load_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/d0e14a1dad4b851ad2a60a0c1a8201493f3d931c/components/datasets/HuggingFace/Load_dataset/component.yaml')
split_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/d0e14a1dad4b851ad2a60a0c1a8201493f3d931c/components/datasets/HuggingFace/Split_dataset/component.yaml')
def huggingface_pipeline():
dataset_dict_task = load_dataset_op(dataset_name='imdb')
with dsl.ParallelFor(dataset_dict_task.outputs['splits']) as split_name:
deataset_task = split_dataset_op(
dataset_dict=dataset_dict_task.outputs['dataset_dict'],
split_name=split_name,
)
if __name__ == '__main__':
import kfp
kfp_endpoint = None
kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(
huggingface_pipeline,
arguments={}
)

View File

@ -1,3 +0,0 @@
# Deprecation Warning
The components in this directory is now moved to [components/contrib/datasets](https://github.com/kubeflow/pipelines/tree/master/components/contrib/datasets). This directory will be removed by the end of 2021.

View File

@ -1,32 +0,0 @@
name: Predict using TF on Dataflow
description: |
Runs TensorFlow prediction on Google Cloud Dataflow
Input and output data is in GCS
inputs:
- {name: Data file pattern, type: GCSPath, description: 'GCS or local path of test file patterns.'} # type: {GCSPath: {data_type: CSV}}
- {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: TFDV schema JSON}}
- {name: Target column, type: String, description: 'Name of the column for prediction target.'}
- {name: Model, type: GCSPath, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
- {name: Batch size, type: Integer, default: '32', description: 'Batch size used in prediction.'}
- {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".'}
- {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'}
- {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
outputs:
- {name: Predictions dir, type: GCSPath, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
- {name: MLPipeline UI metadata, type: UI metadata}
implementation:
container:
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:57d9f7f1cfd458e945d297957621716062d89a49
command: [python2, /ml/predict.py]
args: [
--data, {inputValue: Data file pattern},
--schema, {inputValue: Schema},
--target, {inputValue: Target column},
--model, {inputValue: Model},
--mode, {inputValue: Run mode},
--project, {inputValue: GCP project},
--batchsize, {inputValue: Batch size},
--output, {inputValue: Predictions dir},
--prediction-results-uri-pattern-output-path, {outputPath: Predictions dir},
--ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
]

View File

@ -1,34 +0,0 @@
name: TFX - Data Validation
description: |
Runs Tensorflow Data Validation. https://www.tensorflow.org/tfx/data_validation/get_started
Tensorflow Data Validation (TFDV) can analyze training and serving data to:
* compute descriptive statistics,
* infer a schema,
* detect data anomalies.
inputs:
- {name: Inference data, type: GCSPath, description: GCS path of the CSV file from which to infer the schema.} # type: {GCSPath: {data_type: CSV}}
- {name: Validation data, type: GCSPath, description: GCS path of the CSV file whose contents should be validated.} # type: {GCSPath: {data_type: CSV}}
- {name: Column names, type: GCSPath, description: GCS json file containing a list of column names.} # type: {GCSPath: {data_type: JSON}}
- {name: Key columns, type: String, description: Comma separated list of columns to treat as keys.}
- {name: GCP project, type: GCPProjectID, default: '', description: The GCP project to run the dataflow job.}
- {name: Run mode, type: String, default: local, description: Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud". }
- {name: Validation output, type: GCSPath, description: GCS or local directory.} # type: {GCSPath: {path_type: Directory}}
outputs:
- {name: Schema, type: GCSPath, description: GCS path of the inferred schema JSON.} # type: {GCSPath: {data_type: TFDV schema JSON}}
- {name: Validation result, type: String, description: Indicates whether anomalies were detected or not.}
implementation:
container:
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:57d9f7f1cfd458e945d297957621716062d89a49
command: [python2, /ml/validate.py]
args: [
--csv-data-for-inference, {inputValue: Inference data},
--csv-data-to-validate, {inputValue: Validation data},
--column-names, {inputValue: Column names},
--key-columns, {inputValue: Key columns},
--project, {inputValue: GCP project},
--mode, {inputValue: Run mode},
--output, {inputValue: Validation output},
]
fileOutputs:
Schema: /schema.txt
Validation result: /output_validation_result.txt

View File

@ -1,33 +0,0 @@
name: TFX - Analyze model
description: |
Runs Tensorflow Model Analysis. https://www.tensorflow.org/tfx/model_analysis/get_started
TensorFlow Model Analysis allows you to perform model evaluations in the TFX pipeline, and view resultant metrics and plots in a Jupyter notebook. Specifically, it can provide:
* metrics computed on entire training and holdout dataset, as well as next-day evaluations
* tracking metrics over time
* model quality performance on different feature slices
inputs:
- {name: Model, type: GCSPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
- {name: Evaluation data, type: GCSPath, description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}}
- {name: Schema, type: GCSPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}}
- {name: Run mode, type: String, default: local, description: whether to run the job locally or in Cloud Dataflow.}
- {name: GCP project, type: GCPProjectID, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'}
- {name: Slice columns, type: String, description: Comma-separated list of columns on which to slice for analysis.}
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}}
outputs:
- {name: Analysis results dir, type: GCSPath, description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}}
- {name: MLPipeline UI metadata, type: UI metadata}
implementation:
container:
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:57d9f7f1cfd458e945d297957621716062d89a49
command: [python2, /ml/model_analysis.py]
args: [
--model, {inputValue: Model},
--eval, {inputValue: Evaluation data},
--schema, {inputValue: Schema},
--mode, {inputValue: Run mode},
--project, {inputValue: GCP project},
--slice-columns, {inputValue: Slice columns},
--output, {inputValue: Analysis results dir},
--output-dir-uri-output-path, {outputPath: Analysis results dir},
--ui-metadata-output-path, {outputPath: MLPipeline UI metadata},
]

View File

@ -1,26 +0,0 @@
name: Transform using TF on Dataflow
description: Runs TensorFlow Transform on Google Cloud Dataflow
inputs:
- {name: Training data file pattern, type: GCSPath, description: 'GCS path of train file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
- {name: Evaluation data file pattern, type: GCSPath, description: 'GCS path of eval file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
- {name: Schema, type: GCSPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}}
- {name: GCP project, type: GCPProjectID, description: 'The GCP project to run the dataflow job.'}
- {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".' }
- {name: Preprocessing module, type: GCSPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}}
- {name: Transformed data dir, type: GCSPath, description: 'GCS or local directory'} #Also supports local paths # type: {GCSPath: {path_type: Directory}}
outputs:
- {name: Transformed data dir, type: GCSPath} # type: {GCSPath: {path_type: Directory}}
implementation:
container:
image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:57d9f7f1cfd458e945d297957621716062d89a49
command: [python2, /ml/transform.py]
args: [
--train, {inputValue: Training data file pattern},
--eval, {inputValue: Evaluation data file pattern},
--schema, {inputValue: Schema},
--project, {inputValue: GCP project},
--mode, {inputValue: Run mode},
--preprocessing-module, {inputValue: Preprocessing module},
--output, {inputValue: Transformed data dir},
---output-dir-uri-output-path, {outputPath: Transformed data dir},
]

View File

@ -1,85 +0,0 @@
from kfp.components import InputPath, OutputPath
def Evaluator(
examples_path: InputPath('Examples'),
evaluation_path: OutputPath('ModelEvaluation'),
blessing_path: OutputPath('ModelBlessing'),
model_path: InputPath('Model') = None,
baseline_model_path: InputPath('Model') = None,
schema_path: InputPath('Schema') = None,
eval_config: {'JsonObject': {'data_type': 'proto:tensorflow_model_analysis.EvalConfig'}} = None,
feature_slicing_spec: {'JsonObject': {'data_type': 'proto:tfx.components.evaluator.FeatureSlicingSpec'}} = None,
fairness_indicator_thresholds: list = None,
example_splits: str = None,
module_file: str = None,
module_path: str = None,
):
from tfx.components.evaluator.component import Evaluator as component_class
#Generated code
import os
import tempfile
from tensorflow.io import gfile
from google.protobuf import json_format, message
from tfx.types import channel_utils, artifact_utils
from tfx.components.base import base_executor
arguments = locals().copy()
component_class_args = {}
for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items():
argument_value = arguments.get(name, None)
if argument_value is None:
continue
parameter_type = execution_parameter.type
if isinstance(parameter_type, type) and issubclass(parameter_type, message.Message):
argument_value_obj = parameter_type()
json_format.Parse(argument_value, argument_value_obj)
else:
argument_value_obj = argument_value
component_class_args[name] = argument_value_obj
for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path')
if artifact_path:
artifact = channel_parameter.type()
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
# Recovering splits
subdirs = gfile.listdir(artifact_path)
# Workaround for https://github.com/tensorflow/tensorflow/issues/39167
subdirs = [subdir.rstrip('/') for subdir in subdirs]
split_names = [subdir.replace('Split-', '') for subdir in subdirs]
artifact.split_names = artifact_utils.encode_split_names(sorted(split_names))
component_class_args[name] = channel_utils.as_channel([artifact])
component_class_instance = component_class(**component_class_args)
input_dict = channel_utils.unwrap_channel_dict(component_class_instance.inputs.get_all())
output_dict = {}
exec_properties = component_class_instance.exec_properties
# Generating paths for output artifacts
for name, channel in component_class_instance.outputs.items():
artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path')
if artifact_path:
artifact = channel.type()
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
artifact_list = [artifact]
channel._artifacts = artifact_list
output_dict[name] = artifact_list
print('component instance: ' + str(component_class_instance))
executor_context = base_executor.BaseExecutor.Context(
beam_pipeline_args=arguments.get('beam_pipeline_args'),
tmp_dir=tempfile.gettempdir(),
unique_id='tfx_component',
)
executor = component_class_instance.executor_spec.executor_class(executor_context)
executor.Do(
input_dict=input_dict,
output_dict=output_dict,
exec_properties=exec_properties,
)

View File

@ -1,195 +0,0 @@
name: Evaluator
inputs:
- {name: examples, type: Examples}
- {name: model, type: Model, optional: true}
- {name: baseline_model, type: Model, optional: true}
- {name: schema, type: Schema, optional: true}
- name: eval_config
type:
JsonObject: {data_type: 'proto:tensorflow_model_analysis.EvalConfig'}
optional: true
- name: feature_slicing_spec
type:
JsonObject: {data_type: 'proto:tfx.components.evaluator.FeatureSlicingSpec'}
optional: true
- {name: fairness_indicator_thresholds, type: JsonArray, optional: true}
- {name: example_splits, type: String, optional: true}
- {name: module_file, type: String, optional: true}
- {name: module_path, type: String, optional: true}
outputs:
- {name: evaluation, type: ModelEvaluation}
- {name: blessing, type: ModelBlessing}
metadata:
annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
canonical_location: 'https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/deprecated/tfx/Evaluator/component.yaml'
implementation:
container:
image: tensorflow/tfx:0.29.0
command:
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
def Evaluator(
examples_path,
evaluation_path,
blessing_path,
model_path = None,
baseline_model_path = None,
schema_path = None,
eval_config = None,
feature_slicing_spec = None,
fairness_indicator_thresholds = None,
example_splits = None,
module_file = None,
module_path = None,
):
from tfx.components.evaluator.component import Evaluator as component_class
#Generated code
import os
import tempfile
from tensorflow.io import gfile
from google.protobuf import json_format, message
from tfx.types import channel_utils, artifact_utils
from tfx.components.base import base_executor
arguments = locals().copy()
component_class_args = {}
for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items():
argument_value = arguments.get(name, None)
if argument_value is None:
continue
parameter_type = execution_parameter.type
if isinstance(parameter_type, type) and issubclass(parameter_type, message.Message):
argument_value_obj = parameter_type()
json_format.Parse(argument_value, argument_value_obj)
else:
argument_value_obj = argument_value
component_class_args[name] = argument_value_obj
for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
artifact_path = arguments.get(name + '_uri') or arguments.get(name + '_path')
if artifact_path:
artifact = channel_parameter.type()
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
# Recovering splits
subdirs = gfile.listdir(artifact_path)
# Workaround for https://github.com/tensorflow/tensorflow/issues/39167
subdirs = [subdir.rstrip('/') for subdir in subdirs]
split_names = [subdir.replace('Split-', '') for subdir in subdirs]
artifact.split_names = artifact_utils.encode_split_names(sorted(split_names))
component_class_args[name] = channel_utils.as_channel([artifact])
component_class_instance = component_class(**component_class_args)
input_dict = channel_utils.unwrap_channel_dict(component_class_instance.inputs.get_all())
output_dict = {}
exec_properties = component_class_instance.exec_properties
# Generating paths for output artifacts
for name, channel in component_class_instance.outputs.items():
artifact_path = arguments.get('output_' + name + '_uri') or arguments.get(name + '_path')
if artifact_path:
artifact = channel.type()
artifact.uri = artifact_path.rstrip('/') + '/' # Some TFX components require that the artifact URIs end with a slash
artifact_list = [artifact]
channel._artifacts = artifact_list
output_dict[name] = artifact_list
print('component instance: ' + str(component_class_instance))
executor_context = base_executor.BaseExecutor.Context(
beam_pipeline_args=arguments.get('beam_pipeline_args'),
tmp_dir=tempfile.gettempdir(),
unique_id='tfx_component',
)
executor = component_class_instance.executor_spec.executor_class(executor_context)
executor.Do(
input_dict=input_dict,
output_dict=output_dict,
exec_properties=exec_properties,
)
import json
import argparse
_parser = argparse.ArgumentParser(prog='Evaluator', description='')
_parser.add_argument("--examples", dest="examples_path", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--baseline-model", dest="baseline_model_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--schema", dest="schema_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--eval-config", dest="eval_config", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--feature-slicing-spec", dest="feature_slicing_spec", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--fairness-indicator-thresholds", dest="fairness_indicator_thresholds", type=json.loads, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--example-splits", dest="example_splits", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--module-file", dest="module_file", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--module-path", dest="module_path", type=str, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--evaluation", dest="evaluation_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--blessing", dest="blessing_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
_outputs = Evaluator(**_parsed_args)
args:
- --examples
- {inputPath: examples}
- if:
cond: {isPresent: model}
then:
- --model
- {inputPath: model}
- if:
cond: {isPresent: baseline_model}
then:
- --baseline-model
- {inputPath: baseline_model}
- if:
cond: {isPresent: schema}
then:
- --schema
- {inputPath: schema}
- if:
cond: {isPresent: eval_config}
then:
- --eval-config
- {inputValue: eval_config}
- if:
cond: {isPresent: feature_slicing_spec}
then:
- --feature-slicing-spec
- {inputValue: feature_slicing_spec}
- if:
cond: {isPresent: fairness_indicator_thresholds}
then:
- --fairness-indicator-thresholds
- {inputValue: fairness_indicator_thresholds}
- if:
cond: {isPresent: example_splits}
then:
- --example-splits
- {inputValue: example_splits}
- if:
cond: {isPresent: module_file}
then:
- --module-file
- {inputValue: module_file}
- if:
cond: {isPresent: module_path}
then:
- --module-path
- {inputValue: module_path}
- --evaluation
- {outputPath: evaluation}
- --blessing
- {outputPath: blessing}

Some files were not shown because too many files have changed in this diff Show More