pipelines/components/keras/Train_classifier/from_CSV/component.py

95 lines
3.0 KiB
Python

from typing import NamedTuple
from kfp.components import create_component_from_func, InputPath, OutputPath
def keras_train_classifier_from_csv(
training_features_path: InputPath('CSV'),
training_labels_path: InputPath('CSV'),
network_json_path: InputPath('KerasModelJson'),
model_path: OutputPath('KerasModelHdf5'),
loss_name: str = 'categorical_crossentropy',
num_classes: int = None,
optimizer: str = 'rmsprop',
optimizer_config: dict = None,
learning_rate: float = 0.01,
num_epochs: int = 100,
batch_size: int = 32,
metrics: list = ['accuracy'],
random_seed: int = 0,
) -> NamedTuple('Outputs', [
('final_loss', float),
('final_metrics', dict),
('metrics_history', dict),
]):
'''Trains classifier model using Keras.
Annotations:
author: Alexey Volkov <alexey.volkov@ark-kun.com>
'''
from pathlib import Path
import keras
import numpy
import pandas
import tensorflow
tensorflow.random.set_seed(random_seed)
numpy.random.seed(random_seed)
training_features_df = pandas.read_csv(training_features_path)
training_labels_df = pandas.read_csv(training_labels_path)
x_train = training_features_df.to_numpy()
y_train_labels = training_labels_df.to_numpy()
print('Training features shape:', x_train.shape)
print('Numer of training samples:', x_train.shape[0])
# Convert class vectors to binary class matrices.
y_train_one_hot = keras.utils.to_categorical(y_train_labels, num_classes)
model_json_str = Path(network_json_path).read_text()
model = keras.models.model_from_json(model_json_str)
model.add(keras.layers.Activation('softmax'))
# Initializing the optimizer
optimizer_config = optimizer_config or {}
optimizer_config['learning_rate'] = learning_rate
optimizer = keras.optimizers.deserialize({
'class_name': optimizer,
'config': optimizer_config,
})
model.compile(
loss=loss_name,
optimizer=optimizer,
metrics=metrics,
)
history = model.fit(
x_train,
y_train_one_hot,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True
)
model.save(model_path)
metrics_history = {name: [float(value) for value in values] for name, values in history.history.items()}
final_metrics = {name: values[-1] for name, values in metrics_history.items()}
final_loss = final_metrics['loss']
return (final_loss, final_metrics, metrics_history)
if __name__ == '__main__':
keras_train_classifier_from_csv_op = create_component_from_func(
keras_train_classifier_from_csv,
base_image='tensorflow/tensorflow:2.2.0',
packages_to_install=['keras==2.3.1', 'pandas==1.0.5'],
output_component_file='component.yaml',
annotations={
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/keras/Train_classifier/from_CSV/component.yaml",
},
)