From 9e1ee205126e6b8bf78baf7e1fcdc3ea49cd9852 Mon Sep 17 00:00:00 2001
From: Richard Liu <ricliu@google.com>
Date: Mon, 14 Jan 2019 11:50:13 -0800
Subject: [PATCH 1/3] Fix xgboost for hp tuning

---
 xgboost_ames_housing/housing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py
index 742a14b1..d1ad643c 100644
--- a/xgboost_ames_housing/housing.py
+++ b/xgboost_ames_housing/housing.py
@@ -30,7 +30,8 @@ def read_input(file_name, test_size=0.25):
 
   train_X, test_X, train_y, test_y = train_test_split(X.values,
                                                       y.values,
-                                                      test_size=test_size)
+                                                      test_size=test_size,
+                                                      shuffle=False)
 
   imputer = Imputer()
   train_X = imputer.fit_transform(train_X)
@@ -61,7 +62,7 @@ def train_model(train_X,
 def eval_model(model, test_X, test_y):
   """Evaluate the model performance."""
   predictions = model.predict(test_X)
-  print("MAE on test: {:.2f}".format(mean_absolute_error(predictions, test_y)))
+  print("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y)))
 
 def save_model(model, model_file):
   """Save XGBoost model for serving."""

From 8437ec9e5c89771f06981bf0b0a3ad4b8b0d2e82 Mon Sep 17 00:00:00 2001
From: Richard Liu <ricliu@google.com>
Date: Mon, 14 Jan 2019 15:54:25 -0800
Subject: [PATCH 2/3] Fix logging

---
 xgboost_ames_housing/housing.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py
index d1ad643c..51c594ea 100644
--- a/xgboost_ames_housing/housing.py
+++ b/xgboost_ames_housing/housing.py
@@ -14,6 +14,7 @@
 
 import argparse
 import joblib
+import logging
 import pandas as pd
 from sklearn.metrics import mean_absolute_error
 from sklearn.model_selection import train_test_split
@@ -54,7 +55,7 @@ def train_model(train_X,
             early_stopping_rounds=40,
             eval_set=[(test_X, test_y)])
 
-  print("Best RMSE on eval: {:.2f} with {} rounds".format(
+  logging.info("Best RMSE on eval: {:.2f} with {} rounds".format(
                  model.best_score,
                  model.best_iteration+1))
   return model
@@ -62,12 +63,12 @@ def train_model(train_X,
 def eval_model(model, test_X, test_y):
   """Evaluate the model performance."""
   predictions = model.predict(test_X)
-  print("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y)))
+  logging.info("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y)))
 
 def save_model(model, model_file):
   """Save XGBoost model for serving."""
   joblib.dump(model, model_file)
-  print("Model export success {}".format(model_file))
+  logging.info("Model export success {}".format(model_file))
 
 def main(args):
   (train_X, train_y), (test_X, test_y) = read_input(args.train_input)

From 385956442234e15e761bd72f3402012ad40b45d9 Mon Sep 17 00:00:00 2001
From: Richard Liu <ricliu@google.com>
Date: Mon, 14 Jan 2019 17:01:27 -0800
Subject: [PATCH 3/3] Fix pylint and log fmt

---
 xgboost_ames_housing/housing.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/xgboost_ames_housing/housing.py b/xgboost_ames_housing/housing.py
index 51c594ea..06b212ff 100644
--- a/xgboost_ames_housing/housing.py
+++ b/xgboost_ames_housing/housing.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import argparse
-import joblib
 import logging
+import joblib
 import pandas as pd
 from sklearn.metrics import mean_absolute_error
 from sklearn.model_selection import train_test_split
@@ -55,20 +55,20 @@ def train_model(train_X,
             early_stopping_rounds=40,
             eval_set=[(test_X, test_y)])
 
-  logging.info("Best RMSE on eval: {:.2f} with {} rounds".format(
-                 model.best_score,
-                 model.best_iteration+1))
+  logging.info("Best RMSE on eval: %.2f with %d rounds",
+               model.best_score,
+               model.best_iteration+1)
   return model
 
 def eval_model(model, test_X, test_y):
   """Evaluate the model performance."""
   predictions = model.predict(test_X)
-  logging.info("mean_absolute_error={:.2f}".format(mean_absolute_error(predictions, test_y)))
+  logging.info("mean_absolute_error=%.2f", mean_absolute_error(predictions, test_y))
 
 def save_model(model, model_file):
   """Save XGBoost model for serving."""
   joblib.dump(model, model_file)
-  logging.info("Model export success {}".format(model_file))
+  logging.info("Model export success: %s", model_file)
 
 def main(args):
   (train_X, train_y), (test_X, test_y) = read_input(args.train_input)
@@ -117,5 +117,7 @@ if __name__ == '__main__':
           default=50
           )
 
+  logging.basicConfig(format='%(message)s')
+  logging.getLogger().setLevel(logging.INFO)
   main_args = parser.parse_args()
   main(main_args)