我正在对数据集中的某些功能使用目标编码。我的完整管道是这样的:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from category_encoders.target_encoder import TargetEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
numeric_features = ['feature_1']
numeric_pipeline = Pipeline(steps=[('scaler', StandardScaler())])
ohe_features = ['feature_2', 'feature_3', 'feature_4']
ohe_pipeline = Pipeline(steps=[('ohe', OneHotEncoder())])
te_features = ['feature_5', 'feature_6']
te_pipeline = TargetEncoder()
preprocessor = ColumnTransformer(transformers=[
('numeric', numeric_pipeline, numeric_features),
('ohe_features', ohe_pipeline, ohe_features),
('te_features', te_pipeline, te_features)
]
)
clf_lr = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', LogisticRegression())
]
)
X_train, X_test, y_train, y_test = …Run Code Online (Sandbox Code Playgroud)