from kfp import components chicago_taxi_dataset_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml') pandas_transform_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/6162d55998b176b50267d351241100bb0ee715bc/components/pandas/Transform_DataFrame/in_CSV_format/component.yaml') download_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/240543e483076ae718f82c6f280441daa2f041fd/components/web/Download/component.yaml') create_fully_connected_pytorch_network_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/4e1facea1a270535b515a9e8cc59422d1ad76a9e/components/PyTorch/Create_fully_connected_network/component.yaml') train_pytorch_model_from_csv_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/603342c4b88fe2d69ff07682f702cd3601e883bb/components/PyTorch/Train_PyTorch_model/from_CSV/component.yaml') convert_to_onnx_from_pytorch_script_module_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/e011e4affa85542ef2b24d63fdac27f8d939bbee/components/PyTorch/Convert_to_OnnxModel_from_PyTorchScriptModule/component.yaml') create_pytorch_model_archive_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/abc180be2b2b5538d19eb87124684629ec45e620/components/PyTorch/Create_PyTorch_Model_Archive/component.yaml') def pytorch_pipeline(): feature_columns = ['trip_seconds', 'trip_miles', 'pickup_community_area', 'dropoff_community_area', 'fare', 'tolls', 'extras'] # Excluded 'trip_total' label_column = 'tips' network = create_fully_connected_pytorch_network_op( layer_sizes=[len(feature_columns), 100, 10, 1], activation_name='elu', ).output training_data = chicago_taxi_dataset_op( where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"', select=','.join([label_column] + feature_columns), limit=10000, ).output training_data = pandas_transform_csv_op( table=training_data, transform_code='''df = df.fillna({'tolls': 0.0, 'extras': 0.0}); df = df.dropna(axis='index')''', ).output trained_model = train_pytorch_model_from_csv_op( model=network, training_data=training_data, label_column_name=label_column, loss_function_name='mse_loss', # Optional: batch_size=32, number_of_epochs=2, random_seed=0, learning_rate=0.1, optimizer_name='Adadelta', optimizer_parameters={}, ).outputs['trained_model'] convert_to_onnx_from_pytorch_script_module_op( model=trained_model, list_of_input_shapes=[[len(feature_columns)]], ) # TODO: Use a real working regression handler here. See https://github.com/pytorch/serve/issues/987 serving_handler = download_op('https://raw.githubusercontent.com/pytorch/serve/5c03e711a401387a1d42fc01072fcc38b4995b66/ts/torch_handler/base_handler.py').output model_archive = create_pytorch_model_archive_op( model=trained_model, handler=serving_handler, # model_name="model", # Optional # model_version="1.0", # Optional ).output if __name__ == '__main__': import kfp kfp_endpoint=None kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( pytorch_pipeline, arguments={}, )