我正在使用scikit,正在尝试调整XGBoost。我尝试使用嵌套的交叉验证,通过管道对训练折叠进行重新缩放(以避免数据泄漏和过度拟合),并与GridSearchCV并行进行参数调整,并与cross_val_score并行获得roc_auc得分。
from imblearn.pipeline import Pipeline
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
std_scaling = StandardScaler()
algo = XGBClassifier()
steps = [('std_scaling', StandardScaler()), ('algo', XGBClassifier())]
pipeline = Pipeline(steps)
parameters = {'algo__min_child_weight': [1, 2],
'algo__subsample': [0.6, 0.9],
'algo__max_depth': [4, 6],
'algo__gamma': [0.1, 0.2],
'algo__learning_rate': [0.05, 0.5, 0.3]}
cv1 = RepeatedKFold(n_splits=2, n_repeats = 5, random_state = 15)
clf_auc = GridSearchCV(pipeline, cv = cv1, param_grid = parameters, scoring = 'roc_auc', n_jobs=-1, return_train_score=False)
cv1 = RepeatedKFold(n_splits=2, …Run Code Online (Sandbox Code Playgroud)