tch*_*rty 6 python numpy machine-learning scipy
我正在尝试使用SciPy fmin_bfgs函数在Python中编写逻辑回归,但遇到了一些问题.我编写了逻辑(sigmoid)转换函数和成本函数的函数,并且这些函数工作正常(我使用了通过罐装软件找到的参数向量的优化值来测试函数,并且这些函数匹配).我不太确定我的渐变函数的实现,但它看起来很合理.
这是代码:
# purpose: logistic regression
import numpy as np
import scipy.optimize
# prepare the data
data = np.loadtxt('data.csv', delimiter=',', skiprows=1)
vY = data[:, 0]
mX = data[:, 1:]
intercept = np.ones(mX.shape[0]).reshape(mX.shape[0], 1)
mX = np.concatenate((intercept, mX), axis = 1)
iK = mX.shape[1]
iN = mX.shape[0]
# logistic transformation
def logit(mX, vBeta):
return((1/(1.0 + np.exp(-np.dot(mX, vBeta)))))
# test function call
vBeta0 = np.array([-.10296645, -.0332327, -.01209484, .44626211, .92554137, .53973828,
1.7993371, .7148045 ])
logit(mX, vBeta0)
# cost function
def logLikelihoodLogit(vBeta, mX, vY):
return(-(np.sum(vY*np.log(logit(mX, vBeta)) + (1-vY)*(np.log(1-logit(mX, vBeta))))))
logLikelihoodLogit(vBeta0, mX, vY) # test function call
# gradient function
def likelihoodScore(vBeta, mX, vY):
return(np.dot(mX.T,
((np.dot(mX, vBeta) - vY)/
np.dot(mX, vBeta)).reshape(iN, 1)).reshape(iK, 1))
likelihoodScore(vBeta0, mX, vY).shape # test function call
# optimize the function (without gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]),
args = (mX, vY), gtol = 1e-3)
# optimize the function (with gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]), fprime = likelihoodScore,
args = (mX, vY), gtol = 1e-3)
Run Code Online (Sandbox Code Playgroud)
第一个优化(没有渐变)以关于除零的大量内容结束.
第二个优化(使用渐变)以矩阵未对齐错误结束,这可能意味着我已经得到了错误返回梯度的方式.
对此有任何帮助表示赞赏.如果有人想尝试这个,数据包含在下面.
low,age,lwt,race,smoke,ptl,ht,ui
0,19,182,2,0,0,0,1
0,33,155,3,0,0,0,0
0,20,105,1,1,0,0,0
0,21,108,1,1,0,0,1
0,18,107,1,1,0,0,1
0,21,124,3,0,0,0,0
0,22,118,1,0,0,0,0
0,17,103,3,0,0,0,0
0,29,123,1,1,0,0,0
0,26,113,1,1,0,0,0
0,19,95,3,0,0,0,0
0,19,150,3,0,0,0,0
0,22,95,3,0,0,1,0
0,30,107,3,0,1,0,1
0,18,100,1,1,0,0,0
0,18,100,1,1,0,0,0
0,15,98,2,0,0,0,0
0,25,118,1,1,0,0,0
0,20,120,3,0,0,0,1
0,28,120,1,1,0,0,0
0,32,121,3,0,0,0,0
0,31,100,1,0,0,0,1
0,36,202,1,0,0,0,0
0,28,120,3,0,0,0,0
0,25,120,3,0,0,0,1
0,28,167,1,0,0,0,0
0,17,122,1,1,0,0,0
0,29,150,1,0,0,0,0
0,26,168,2,1,0,0,0
0,17,113,2,0,0,0,0
0,17,113,2,0,0,0,0
0,24,90,1,1,1,0,0
0,35,121,2,1,1,0,0
0,25,155,1,0,0,0,0
0,25,125,2,0,0,0,0
0,29,140,1,1,0,0,0
0,19,138,1,1,0,0,0
0,27,124,1,1,0,0,0
0,31,215,1,1,0,0,0
0,33,109,1,1,0,0,0
0,21,185,2,1,0,0,0
0,19,189,1,0,0,0,0
0,23,130,2,0,0,0,0
0,21,160,1,0,0,0,0
0,18,90,1,1,0,0,1
0,18,90,1,1,0,0,1
0,32,132,1,0,0,0,0
0,19,132,3,0,0,0,0
0,24,115,1,0,0,0,0
0,22,85,3,1,0,0,0
0,22,120,1,0,0,1,0
0,23,128,3,0,0,0,0
0,22,130,1,1,0,0,0
0,30,95,1,1,0,0,0
0,19,115,3,0,0,0,0
0,16,110,3,0,0,0,0
0,21,110,3,1,0,0,1
0,30,153,3,0,0,0,0
0,20,103,3,0,0,0,0
0,17,119,3,0,0,0,0
0,17,119,3,0,0,0,0
0,23,119,3,0,0,0,0
0,24,110,3,0,0,0,0
0,28,140,1,0,0,0,0
0,26,133,3,1,2,0,0
0,20,169,3,0,1,0,1
0,24,115,3,0,0,0,0
0,28,250,3,1,0,0,0
0,20,141,1,0,2,0,1
0,22,158,2,0,1,0,0
0,22,112,1,1,2,0,0
0,31,150,3,1,0,0,0
0,23,115,3,1,0,0,0
0,16,112,2,0,0,0,0
0,16,135,1,1,0,0,0
0,18,229,2,0,0,0,0
0,25,140,1,0,0,0,0
0,32,134,1,1,1,0,0
0,20,121,2,1,0,0,0
0,23,190,1,0,0,0,0
0,22,131,1,0,0,0,0
0,32,170,1,0,0,0,0
0,30,110,3,0,0,0,0
0,20,127,3,0,0,0,0
0,23,123,3,0,0,0,0
0,17,120,3,1,0,0,0
0,19,105,3,0,0,0,0
0,23,130,1,0,0,0,0
0,36,175,1,0,0,0,0
0,22,125,1,0,0,0,0
0,24,133,1,0,0,0,0
0,21,134,3,0,0,0,0
0,19,235,1,1,0,1,0
0,25,95,1,1,3,0,1
0,16,135,1,1,0,0,0
0,29,135,1,0,0,0,0
0,29,154,1,0,0,0,0
0,19,147,1,1,0,0,0
0,19,147,1,1,0,0,0
0,30,137,1,0,0,0,0
0,24,110,1,0,0,0,0
0,19,184,1,1,0,1,0
0,24,110,3,0,1,0,0
0,23,110,1,0,0,0,0
0,20,120,3,0,0,0,0
0,25,241,2,0,0,1,0
0,30,112,1,0,0,0,0
0,22,169,1,0,0,0,0
0,18,120,1,1,0,0,0
0,16,170,2,0,0,0,0
0,32,186,1,0,0,0,0
0,18,120,3,0,0,0,0
0,29,130,1,1,0,0,0
0,33,117,1,0,0,0,1
0,20,170,1,1,0,0,0
0,28,134,3,0,0,0,0
0,14,135,1,0,0,0,0
0,28,130,3,0,0,0,0
0,25,120,1,0,0,0,0
0,16,95,3,0,0,0,0
0,20,158,1,0,0,0,0
0,26,160,3,0,0,0,0
0,21,115,1,0,0,0,0
0,22,129,1,0,0,0,0
0,25,130,1,0,0,0,0
0,31,120,1,0,0,0,0
0,35,170,1,0,1,0,0
0,19,120,1,1,0,0,0
0,24,116,1,0,0,0,0
0,45,123,1,0,0,0,0
1,28,120,3,1,1,0,1
1,29,130,1,0,0,0,1
1,34,187,2,1,0,1,0
1,25,105,3,0,1,1,0
1,25,85,3,0,0,0,1
1,27,150,3,0,0,0,0
1,23,97,3,0,0,0,1
1,24,128,2,0,1,0,0
1,24,132,3,0,0,1,0
1,21,165,1,1,0,1,0
1,32,105,1,1,0,0,0
1,19,91,1,1,2,0,1
1,25,115,3,0,0,0,0
1,16,130,3,0,0,0,0
1,25,92,1,1,0,0,0
1,20,150,1,1,0,0,0
1,21,200,2,0,0,0,1
1,24,155,1,1,1,0,0
1,21,103,3,0,0,0,0
1,20,125,3,0,0,0,1
1,25,89,3,0,2,0,0
1,19,102,1,0,0,0,0
1,19,112,1,1,0,0,1
1,26,117,1,1,1,0,0
1,24,138,1,0,0,0,0
1,17,130,3,1,1,0,1
1,20,120,2,1,0,0,0
1,22,130,1,1,1,0,1
1,27,130,2,0,0,0,1
1,20,80,3,1,0,0,1
1,17,110,1,1,0,0,0
1,25,105,3,0,1,0,0
1,20,109,3,0,0,0,0
1,18,148,3,0,0,0,0
1,18,110,2,1,1,0,0
1,20,121,1,1,1,0,1
1,21,100,3,0,1,0,0
1,26,96,3,0,0,0,0
1,31,102,1,1,1,0,0
1,15,110,1,0,0,0,0
1,23,187,2,1,0,0,0
1,20,122,2,1,0,0,0
1,24,105,2,1,0,0,0
1,15,115,3,0,0,0,1
1,23,120,3,0,0,0,0
1,30,142,1,1,1,0,0
1,22,130,1,1,0,0,0
1,17,120,1,1,0,0,0
1,23,110,1,1,1,0,0
1,17,120,2,0,0,0,0
1,26,154,3,0,1,1,0
1,20,106,3,0,0,0,0
1,26,190,1,1,0,0,0
1,14,101,3,1,1,0,0
1,28,95,1,1,0,0,0
1,14,100,3,0,0,0,0
1,23,94,3,1,0,0,0
1,17,142,2,0,0,1,0
1,21,130,1,1,0,1,0
Run Code Online (Sandbox Code Playgroud)
您的问题是您试图最小化的函数logLikelihoodLogit, 将返回NaN与您的初始估计非常接近的值。并且它也会尝试评估负对数并遇到其他问题。fmin_bfgs不知道这一点,将尝试评估此类值的函数并遇到麻烦。
我建议改用有界优化。您可以optimize.fmin_l_bfgs_b为此使用 scipy 。它使用与 类似的算法fmin_bfgs,但它支持参数空间中的边界。您可以类似地调用它,只需添加一个 bounds 关键字。这是一个关于如何调用的简单示例fmin_l_bfgs_b:
from scipy.optimize import fmin_bfgs, fmin_l_bfgs_b
# list of bounds: each item is a tuple with the (lower, upper) bounds
bd = [(0, 1.), ...]
test = fmin_l_bfgs_b(logLikelihoodLogit, x0=x0, args=(mX, vY), bounds=bd,
approx_grad=True)
Run Code Online (Sandbox Code Playgroud)
在这里,我使用的是近似梯度(似乎可以很好地处理您的数据),但是您可以fprime像示例中一样通过(我没有时间检查其正确性)。您会比我更了解您的参数空间,只需确保为您的参数可以采用的所有有意义的值构建边界数组。
这是我发回 SciPy 列表的答案,该问题被交叉发布到该列表中。感谢@tiago 的回答。基本上,我重新参数化了似然函数。另外,添加了对 check_grad 函数的调用。
#=====================================================
# purpose: logistic regression
import numpy as np
import scipy as sp
import scipy.optimize
import matplotlib as mpl
import os
# prepare the data
data = np.loadtxt('data.csv', delimiter=',', skiprows=1)
vY = data[:, 0]
mX = data[:, 1:]
# mX = (mX - np.mean(mX))/np.std(mX) # standardize the data; if required
intercept = np.ones(mX.shape[0]).reshape(mX.shape[0], 1)
mX = np.concatenate((intercept, mX), axis = 1)
iK = mX.shape[1]
iN = mX.shape[0]
# logistic transformation
def logit(mX, vBeta):
return((np.exp(np.dot(mX, vBeta))/(1.0 + np.exp(np.dot(mX, vBeta)))))
# test function call
vBeta0 = np.array([-.10296645, -.0332327, -.01209484, .44626211, .92554137, .53973828,
1.7993371, .7148045 ])
logit(mX, vBeta0)
# cost function
def logLikelihoodLogit(vBeta, mX, vY):
return(-(np.sum(vY*np.log(logit(mX, vBeta)) + (1-vY)*(np.log(1-logit(mX, vBeta))))))
logLikelihoodLogit(vBeta0, mX, vY) # test function call
# different parametrization of the cost function
def logLikelihoodLogitVerbose(vBeta, mX, vY):
return(-(np.sum(vY*(np.dot(mX, vBeta) - np.log((1.0 + np.exp(np.dot(mX, vBeta))))) +
(1-vY)*(-np.log((1.0 + np.exp(np.dot(mX, vBeta))))))))
logLikelihoodLogitVerbose(vBeta0, mX, vY) # test function call
# gradient function
def likelihoodScore(vBeta, mX, vY):
return(np.dot(mX.T,
(logit(mX, vBeta) - vY)))
likelihoodScore(vBeta0, mX, vY).shape # test function call
sp.optimize.check_grad(logLikelihoodLogitVerbose, likelihoodScore,
vBeta0, mX, vY) # check that the analytical gradient is close to
# numerical gradient
# optimize the function (without gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogitVerbose,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]),
args = (mX, vY), gtol = 1e-3)
# optimize the function (with gradient)
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogitVerbose,
x0 = np.array([-.1, -.03, -.01, .44, .92, .53,
1.8, .71]), fprime = likelihoodScore,
args = (mX, vY), gtol = 1e-3)
#=====================================================
Run Code Online (Sandbox Code Playgroud)