Xgboost参数调节
作者:互联网
转自:https://segmentfault.com/a/1190000014040317
1.调节最大迭代次数n_estimators
# 最佳迭代次数:n_estimators from xgboost import XGBRegressor from sklearn.model_selection import GridSearchCV cv_params = {'n_estimators': [20,30,40]} other_params = {'learning_rate': 0.1, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
2.调试的参数是min_child_weight以及max_depth:
# 调试的参数是min_child_weight以及max_depth: cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [6,7,8]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
3.调试的参数是min_child_weight以及max_depth:
# 调试的参数是min_child_weight以及max_depth: cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [6,7,8]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
4.调试参数:gamma:
# 调试参数:gamma: cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
5. 调试subsample以及colsample_bytree:
# 调试subsample以及colsample_bytree: cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
6.调试reg_alpha以及reg_lambda:
# 调试reg_alpha以及reg_lambda: cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
7.调试reg_alpha以及reg_lambda:
# 调试earning_rate,一般这时候要调小学习率来测试: cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]} other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 1} model = XGBRegressor(**other_params) optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4) optimized_GBM.fit(x_data, y_data) evalute_result =optimized_GBM.return_train_score print('每轮迭代运行结果:{0}'.format(evalute_result)) print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_)) print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
标签:GBM,format,调节,Xgboost,optimized,参数,print,params,reg 来源: https://www.cnblogs.com/wangzhenghua/p/11251462.html