95 lines
3.0 KiB
Python
95 lines
3.0 KiB
Python
from typing import NamedTuple
|
|
from kfp.components import create_component_from_func, InputPath, OutputPath
|
|
|
|
def keras_train_classifier_from_csv(
|
|
training_features_path: InputPath('CSV'),
|
|
training_labels_path: InputPath('CSV'),
|
|
network_json_path: InputPath('KerasModelJson'),
|
|
model_path: OutputPath('KerasModelHdf5'),
|
|
loss_name: str = 'categorical_crossentropy',
|
|
num_classes: int = None,
|
|
optimizer: str = 'rmsprop',
|
|
optimizer_config: dict = None,
|
|
learning_rate: float = 0.01,
|
|
num_epochs: int = 100,
|
|
batch_size: int = 32,
|
|
metrics: list = ['accuracy'],
|
|
random_seed: int = 0,
|
|
) -> NamedTuple('Outputs', [
|
|
('final_loss', float),
|
|
('final_metrics', dict),
|
|
('metrics_history', dict),
|
|
]):
|
|
'''Trains classifier model using Keras.
|
|
|
|
Annotations:
|
|
author: Alexey Volkov <alexey.volkov@ark-kun.com>
|
|
'''
|
|
from pathlib import Path
|
|
|
|
import keras
|
|
import numpy
|
|
import pandas
|
|
import tensorflow
|
|
|
|
tensorflow.random.set_seed(random_seed)
|
|
numpy.random.seed(random_seed)
|
|
|
|
training_features_df = pandas.read_csv(training_features_path)
|
|
training_labels_df = pandas.read_csv(training_labels_path)
|
|
|
|
x_train = training_features_df.to_numpy()
|
|
y_train_labels = training_labels_df.to_numpy()
|
|
print('Training features shape:', x_train.shape)
|
|
print('Numer of training samples:', x_train.shape[0])
|
|
|
|
# Convert class vectors to binary class matrices.
|
|
y_train_one_hot = keras.utils.to_categorical(y_train_labels, num_classes)
|
|
|
|
model_json_str = Path(network_json_path).read_text()
|
|
model = keras.models.model_from_json(model_json_str)
|
|
|
|
model.add(keras.layers.Activation('softmax'))
|
|
|
|
# Initializing the optimizer
|
|
optimizer_config = optimizer_config or {}
|
|
optimizer_config['learning_rate'] = learning_rate
|
|
optimizer = keras.optimizers.deserialize({
|
|
'class_name': optimizer,
|
|
'config': optimizer_config,
|
|
})
|
|
|
|
model.compile(
|
|
loss=loss_name,
|
|
optimizer=optimizer,
|
|
metrics=metrics,
|
|
)
|
|
|
|
history = model.fit(
|
|
x_train,
|
|
y_train_one_hot,
|
|
batch_size=batch_size,
|
|
epochs=num_epochs,
|
|
shuffle=True
|
|
)
|
|
|
|
model.save(model_path)
|
|
|
|
metrics_history = {name: [float(value) for value in values] for name, values in history.history.items()}
|
|
final_metrics = {name: values[-1] for name, values in metrics_history.items()}
|
|
final_loss = final_metrics['loss']
|
|
return (final_loss, final_metrics, metrics_history)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
keras_train_classifier_from_csv_op = create_component_from_func(
|
|
keras_train_classifier_from_csv,
|
|
base_image='tensorflow/tensorflow:2.2.0',
|
|
packages_to_install=['keras==2.3.1', 'pandas==1.0.5'],
|
|
output_component_file='component.yaml',
|
|
annotations={
|
|
"author": "Alexey Volkov <alexey.volkov@ark-kun.com>",
|
|
"canonical_location": "https://raw.githubusercontent.com/Ark-kun/pipeline_components/master/components/keras/Train_classifier/from_CSV/component.yaml",
|
|
},
|
|
)
|