我想确保我的机器学习的操作顺序是正确的:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.grid_search import GridSearchCV
# 1. Initialize model
model = RandomForestClassifier(5000)
# 2. Load dataset
iris = datasets.load_iris()
X, y = iris.data, iris.target
# 3. Remove unimportant features
model = SelectFromModel(model, threshold=0.5).estimator
# 4. cross validate model on the important features
k_fold = KFold(n=len(data), n_folds=10, shuffle=True)
for k, (train, test) in enumerate(k_fold):
self.model.fit(data[train], target[train])
# 5. grid search for best parameters
param_grid = {
'n_estimators': [1000, 2500, …Run Code Online (Sandbox Code Playgroud)