Merge pull request #476 from richardsliu/hp_tuning

Fix xgboost example for hyperparameter tuning
This commit is contained in:
Richard Liu 2019-01-14 17:41:07 -08:00 committed by GitHub
commit 64c3889071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 10 additions and 6 deletions

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import logging
import joblib import joblib
import pandas as pd import pandas as pd
from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_absolute_error
@ -30,7 +31,8 @@ def read_input(file_name, test_size=0.25):
train_X, test_X, train_y, test_y = train_test_split(X.values, train_X, test_X, train_y, test_y = train_test_split(X.values,
y.values, y.values,
test_size=test_size) test_size=test_size,
shuffle=False)
imputer = Imputer() imputer = Imputer()
train_X = imputer.fit_transform(train_X) train_X = imputer.fit_transform(train_X)
@ -53,20 +55,20 @@ def train_model(train_X,
early_stopping_rounds=40, early_stopping_rounds=40,
eval_set=[(test_X, test_y)]) eval_set=[(test_X, test_y)])
print("Best RMSE on eval: {:.2f} with {} rounds".format( logging.info("Best RMSE on eval: %.2f with %d rounds",
model.best_score, model.best_score,
model.best_iteration+1)) model.best_iteration+1)
return model return model
def eval_model(model, test_X, test_y): def eval_model(model, test_X, test_y):
"""Evaluate the model performance.""" """Evaluate the model performance."""
predictions = model.predict(test_X) predictions = model.predict(test_X)
print("MAE on test: {:.2f}".format(mean_absolute_error(predictions, test_y))) logging.info("mean_absolute_error=%.2f", mean_absolute_error(predictions, test_y))
def save_model(model, model_file): def save_model(model, model_file):
"""Save XGBoost model for serving.""" """Save XGBoost model for serving."""
joblib.dump(model, model_file) joblib.dump(model, model_file)
print("Model export success {}".format(model_file)) logging.info("Model export success: %s", model_file)
def main(args): def main(args):
(train_X, train_y), (test_X, test_y) = read_input(args.train_input) (train_X, train_y), (test_X, test_y) = read_input(args.train_input)
@ -115,5 +117,7 @@ if __name__ == '__main__':
default=50 default=50
) )
logging.basicConfig(format='%(message)s')
logging.getLogger().setLevel(logging.INFO)
main_args = parser.parse_args() main_args = parser.parse_args()
main(main_args) main(main_args)