Gen*_* Xu 7 python machine-learning scikit-learn
我想使用嵌套 CV 方法从 SVC 中找到最佳参数:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X, y = load_breast_cancer(return_X_y=True)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Imputer, StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
pipe_svc = make_pipeline(Imputer(),StandardScaler(),PCA(n_components=2),SVC(random_state=1))
param_range = [0.001,0.01,0.1,1,10,100,1000]
param_grid = [{'svc__C': param_range, 'svc__kernel': ['linear']},
{'svc__C': param_range, 'svc__gamma': param_range,'svc__kernel': ['rbf']}]
gs = GridSearchCV(estimator=pipe_svc,param_grid=param_grid, scoring='accuracy',n_jobs=4, cv = 2)
scores = cross_val_score(gs, X_train, y_train, scoring='accuracy', cv=5)
scores
# how do I get the best parameters out from gridsearch after cross_val?
Out[]: array([0.925 , 0.9375 , 0.925 , 0.95 , 0.94871795])
gs.best_estimator_
Out[]: Pipeline(memory=None,
steps=[('imputer', Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)), ('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('pca', PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
svd_solver='auto', tol=0.0, whiten=False)...ar',
max_iter=-1, probability=False, random_state=1, shrinking=True,
tol=0.001, verbose=False))])
Run Code Online (Sandbox Code Playgroud)
最后一行代码只给出了 5 个准确度分数。gs.bestestimator_ 也不会产生任何有用的信息。在管道中结合 GridSearchCV 和 Cross_Val 的最佳方法是什么?
好吧,您不必使用cross_val_score,您可以在交叉验证期间以及找到最佳估计器之后获取所有信息和元结果。
请考虑这个例子:
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Imputer, StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
pipe_svc = make_pipeline(Imputer(),StandardScaler(),PCA(n_components=2),SVC(random_state=1))
param_range = [0.001,0.01,0.1,1,10,100,1000]
param_grid = {'svc__C': [0.001,0.01,0.1,1,10,100,1000], 'svc__kernel': ['linear', 'rbf'],
'svc__gamma': [0.001,0.01,0.1,1,10,100,1000]}
cv = StratifiedKFold(n_splits=5)
gs = GridSearchCV(estimator=pipe_svc,param_grid=param_grid, scoring='accuracy', cv = cv,
return_train_score=True)
gs.fit(X_train, y_train)
print("Best Estimator: \n{}\n".format(gs.best_estimator_))
print("Best Parameters: \n{}\n".format(gs.best_params_))
print("Best Test Score: \n{}\n".format(gs.best_score_))
print("Best Training Score: \n{}\n".format(gs.cv_results_['mean_train_score'][gs.best_index_]))
print("All Training Scores: \n{}\n".format(gs.cv_results_['mean_train_score']))
print("All Test Scores: \n{}\n".format(gs.cv_results_['mean_test_score']))
# # This prints out all results during Cross-Validation in details
#print("All Meta Results During CV Search: \n{}\n".format(gs.cv_results_))
Run Code Online (Sandbox Code Playgroud)
输出
Best Estimator:
Pipeline(memory=None,
steps=[('imputer', Imputer(axis=0, copy=True,
missing_values='NaN', strategy='mean', verbose=0)),
('standardscaler', StandardScaler(copy=True, with_mean=True,
with_std=True)), ('pca', PCA(copy=True, iterated_power='auto',
n_components=2, random_state=None,
svd_solver='auto', tol=0.0, whiten=False)...ar',
max_iter=-1, probability=False, random_state=1, shrinking=True,
tol=0.001, verbose=False))])
Best Parameters:
{'svc__gamma': 0.001, 'svc__kernel': 'linear', 'svc__C': 1}
Best Test Score:
0.9422110552763819
Best Training Score:
0.9440783896216558
All Training Scores:
[0.90012027 0.64070503 0.90012027 0.64070503 0.90012027 0.64070503
0.90012027 0.64070503 0.90012027 0.64070503 0.90012027 0.64070503
0.90012027 0.64070503 0.92587291 0.64070503 0.92587291 0.64070503
0.92587291 0.64070503 0.92587291 0.64070503 0.92587291 0.64070503
0.92587291 0.64070503 0.92587291 0.64070503 0.93779697 0.68906962
0.93779697 0.91582382 0.93779697 0.92901362 0.93779697 0.88879951
0.93779697 0.64070503 0.93779697 0.64070503 0.93779697 0.64070503
0.94407839 0.91394491 0.94407839 0.93277932 0.94407839 0.93968376
0.94407839 0.95413931 0.94407839 0.98052483 0.94407839 0.9949725
0.94407839 0.99937304 0.94533822 0.93090042 0.94533822 0.94345143
0.94533822 0.94911575 0.94533822 0.96293448 0.94533822 0.99434357
0.94533822 1. 0.94533822 1. 0.94533822 0.94219554
0.94533822 0.94219357 0.94533822 0.95099466 0.94533822 0.98052286
0.94533822 1. 0.94533822 1. 0.94533822 1.
0.94596518 0.9428225 0.94596518 0.94345537 0.94596518 0.95539323
0.94596518 0.99371858 0.94596518 1. 0.94596518 1.
0.94596518 1. ]
All Test Scores:
[0.88944724 0.64070352 0.88944724 0.64070352 0.88944724 0.64070352
0.88944724 0.64070352 0.88944724 0.64070352 0.88944724 0.64070352
0.88944724 0.64070352 0.92713568 0.64070352 0.92713568 0.64070352
0.92713568 0.64070352 0.92713568 0.64070352 0.92713568 0.64070352
0.92713568 0.64070352 0.92713568 0.64070352 0.9321608 0.68090452
0.9321608 0.90954774 0.9321608 0.92211055 0.9321608 0.84422111
0.9321608 0.64070352 0.9321608 0.64070352 0.9321608 0.64070352
0.94221106 0.9120603 0.94221106 0.92713568 0.94221106 0.91959799
0.94221106 0.93969849 0.94221106 0.81407035 0.94221106 0.65075377
0.94221106 0.64572864 0.94221106 0.92964824 0.94221106 0.92964824
0.94221106 0.92462312 0.94221106 0.92211055 0.94221106 0.80653266
0.94221106 0.65326633 0.94221106 0.64572864 0.94221106 0.92964824
0.94221106 0.93969849 0.94221106 0.92713568 0.94221106 0.90954774
0.94221106 0.82663317 0.94221106 0.65326633 0.94221106 0.64572864
0.93969849 0.94221106 0.93969849 0.93467337 0.93969849 0.92964824
0.93969849 0.87939698 0.93969849 0.8241206 0.93969849 0.65326633
0.93969849 0.64572864]
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
8228 次 |
| 最近记录: |