我继续调查管道.我的目标是仅使用管道执行机器学习的每个步骤.使用其他用例更灵活,更容易调整我的管道.所以我做了什么:
这是我的代码:
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectKBest
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
class FillNa(BaseEstimator, TransformerMixin):
def transform(self, x, y=None):
non_numerics_columns = x.columns.difference(
x._get_numeric_data().columns)
for column in x.columns:
if column in non_numerics_columns:
x.loc[:, column] = x.loc[:, …Run Code Online (Sandbox Code Playgroud)