From 9e1ee205126e6b8bf78baf7e1fcdc3ea49cd9852 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Mon, 14 Jan 2019 11:50:13 -0800 Subject: [PATCH 1/3] Fix xgboost for hp tuning --- xgboost_ames_housing/housing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py index 742a14b1..d1ad643c 100644 --- a/xgboost_ames_housing/housing.py +++ b/xgboost_ames_housing/housing.py @@ -30,7 +30,8 @@ def read_input(file_name, test_size=0.25): train_X, test_X, train_y, test_y = train_test_split(X.values, y.values, - test_size=test_size) + test_size=test_size, + shuffle=False) imputer = Imputer() train_X = imputer.fit_transform(train_X) @@ -61,7 +62,7 @@ def train_model(train_X, def eval_model(model, test_X, test_y): """Evaluate the model performance.""" predictions = model.predict(test_X) - print("MAE on test: {:.2f}".format(mean_absolute_error(predictions, test_y))) + print("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y))) def save_model(model, model_file): """Save XGBoost model for serving.""" From 8437ec9e5c89771f06981bf0b0a3ad4b8b0d2e82 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Mon, 14 Jan 2019 15:54:25 -0800 Subject: [PATCH 2/3] Fix logging --- xgboost_ames_housing/housing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py index d1ad643c..51c594ea 100644 --- a/xgboost_ames_housing/housing.py +++ b/xgboost_ames_housing/housing.py @@ -14,6 +14,7 @@ import argparse import joblib +import logging import pandas as pd from sklearn.metrics import mean_absolute_error from sklearn.model_selection import train_test_split @@ -54,7 +55,7 @@ def train_model(train_X, early_stopping_rounds=40, eval_set=[(test_X, test_y)]) - print("Best RMSE on eval: {:.2f} with {} rounds".format( + logging.info("Best RMSE on eval: {:.2f} with {} rounds".format( model.best_score, model.best_iteration+1)) return model @@ -62,12 +63,12 @@ def train_model(train_X, def eval_model(model, test_X, test_y): """Evaluate the model performance.""" predictions = model.predict(test_X) - print("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y))) + logging.info("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y))) def save_model(model, model_file): """Save XGBoost model for serving.""" joblib.dump(model, model_file) - print("Model export success {}".format(model_file)) + logging.info("Model export success {}".format(model_file)) def main(args): (train_X, train_y), (test_X, test_y) = read_input(args.train_input) From 385956442234e15e761bd72f3402012ad40b45d9 Mon Sep 17 00:00:00 2001 From: Richard Liu Date: Mon, 14 Jan 2019 17:01:27 -0800 Subject: [PATCH 3/3] Fix pylint and log fmt --- xgboost_ames_housing/housing.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py index 51c594ea..06b212ff 100644 --- a/xgboost_ames_housing/housing.py +++ b/xgboost_ames_housing/housing.py @@ -13,8 +13,8 @@ # limitations under the License. import argparse -import joblib import logging +import joblib import pandas as pd from sklearn.metrics import mean_absolute_error from sklearn.model_selection import train_test_split @@ -55,20 +55,20 @@ def train_model(train_X, early_stopping_rounds=40, eval_set=[(test_X, test_y)]) - logging.info("Best RMSE on eval: {:.2f} with {} rounds".format( - model.best_score, - model.best_iteration+1)) + logging.info("Best RMSE on eval: %.2f with %d rounds", + model.best_score, + model.best_iteration+1) return model def eval_model(model, test_X, test_y): """Evaluate the model performance.""" predictions = model.predict(test_X) - logging.info("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y))) + logging.info("mean_absolute_error=%.2f", mean_absolute_error(predictions, test_y)) def save_model(model, model_file): """Save XGBoost model for serving.""" joblib.dump(model, model_file) - logging.info("Model export success {}".format(model_file)) + logging.info("Model export success: %s", model_file) def main(args): (train_X, train_y), (test_X, test_y) = read_input(args.train_input) @@ -117,5 +117,7 @@ if __name__ == '__main__': default=50 ) + logging.basicConfig(format='%(message)s') + logging.getLogger().setLevel(logging.INFO) main_args = parser.parse_args() main(main_args)