其他分享
首页 > 其他分享> > xgboost和LightGBM的模型参数寻优的代码---奥图那optuna篇

xgboost和LightGBM的模型参数寻优的代码---奥图那optuna篇

作者:互联网

文章目录


主要给出了xgboost和lightGBM调参的实现代码。这个寻优过程挺快的,可以指定不同的测试集。给定参数的寻优范围可以实现自动寻优。

XGBoost

主要需要更改的地方:

  1. param 中指定搜索的范围
  2. **param 将参数传入XGBClassifier
  3. 设置需要测试的轮数 n_trials
# xgboost
trainrecords = pd.read_csv(r"./traindata/traindata_combine.csv",encoding="utf-8")
X_train,y_train = wash_data(trainrecords)

devrecords = pd.read_csv(r"./traindata/devdata_combine.csv",encoding="utf-8")
X_test,y_test = wash_data(devrecords)            # 处理缺失值、规范字段顺序

sampler = TPESampler(seed=10) # for reproducibility

# X_train = X_train.iloc[:100,]
# y_train = y_train.iloc[:100,]
def objective(trial): 
    param = {
        'objective': 'binary:logistic',
        'metric': 'logloss',
        'boosting_type': 'gbdt',
        'verbosity':3,
        #'num_class':3, 
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 10.0),
        'lambda': trial.suggest_loguniform('lambda', 1e-8, 10.0),
        #'num_leaves': trial.suggest_int('num_leaves', 2, 512),
        'learning_rate': 0.05,#trial.suggest_loguniform('learning_rate', 0.01, 0.1),
        'n_estimators': 2000,#trial.suggest_int('n_estimators', 1000, 3000),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'subsample': 0.8,#trial.suggest_uniform('subsample', 0.5, 1.0),
        #'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        #'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'max_depth':8#trial.suggest_int('max_depth', 5, 15),
    }
    xgb = XGBClassifier(**param)     # 这个很重要!!!!
    gbm = xgb.fit(X_train,y_train)
    return accuracy_score(y_test, np.round(gbm.predict(X_test)))

study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective, n_trials=10)  # 设置需要测试的次数
# 输出模型的最好结参数
study.best_params
print(f"最好的参数{study.best_params}")
print(f"用时{time()-t0}")

LightGBM

iris = load_iris()
X = iris.data
y = iris.target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 14)

sampler = TPESampler(seed=10) # for reproducibility
def objective(trial):
    dtrain = lgb.Dataset(X_train, label=y_train)   #lgb的训练集生成方式
    
    param = {
        'objective': 'binary',
        'metric': 'logloss',
        #'verbosity': -1,
        'boosting_type': 'gbdt',
        #'num_class':3,
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 512),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-8, 1.0),
        'n_estimators': trial.suggest_int('n_estimators', 700, 3000),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    }

    gbm = lgb.train(param, dtrain)
    return accuracy_score(y_test, np.round(gbm.predict(X_test)))

study = optuna.create_study(direction='maximize', sampler=sampler)
study.optimize(objective, n_trials=2)
# 输出模型的最好结参数
study.best_params
print(f"用时{time()-t0}")


# 预测
dtrain = lgb.Dataset(X_train, label=y_train)
param = {
     'objective': 'binary',
     'metric': 'logloss',
    'verbosity': -1,
    'boosting_type': 'gbdt',
    'num_class':3,
    'lambda_l1': 0.0234448167468032,
     'lambda_l2': 7.075730911992614e-07, 
     'num_leaves': 173,
     'learning_rate': 4.887601625186522e-05,
     'n_estimators': 1824,
     'feature_fraction': 0.9712805361251421,
     'bagging_fraction': 0.8498709168727996,
     'bagging_freq': 2,
     'min_child_samples': 17,
 }
gbm = lgb.train(param, dtrain)
gbm_pred = gbm.predict(X_test)
print(accuracy_score(y_test, np.round(gbm.predict(X_test))))

标签:LightGBM,int,optuna,study,寻优,trial,train,suggest,test
来源: https://blog.csdn.net/weixin_43899514/article/details/119206070