上接(1)
完成了数据预处理之后进行回归器的学习:
if self.model_name == "xgboost":
clf = XGBRegressor()
if self.find_best_param:
test_params = {
'max_depth': [2,3, 4, 5, 6],
'learning_rate': [0.25,0.5,0.1, 0.3],
'n_estimators': [10,25,50, 100, 200]
}
mean_squared_error_scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False)
gs = GridSearchCV(estimator=clf, param_grid=test_params, cv=4, verbose=2,
scoring=mean_squared_error_scorer, n_jobs=2)
gs.fit(self.train_X, self.train_y)
print('======xgboost==== Best Results ================')
print('best params: {}, best score: {}'.format(gs.best_params_, gs.best_score_))
print('=============== End ================')
best_params = gs.best_params_
with open("medical_treament_find_best_param.txt",'a+') as fh:
fh.write("{} {} best params: {} best score: {}\n".format(
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()), self.model_name, gs.best_params_,
gs.best_score_))
else:
best_params = {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}
clf = XGBRegressor(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'],
max_depth=best_params['max_depth'])
clf.fit(self.train_X, self.train_y)
self.test_y = clf.predict(self.test_X)
print("train successfully!")
发现其实学习训练部分就是简单使用一下现成的库函数,没什么东西,主要工作还是在数据的预处理部分。
最终的结果如图所示:
除了个别点相差有点夸张之外,整体1200个预测结果还是可以接受的。