Dat*_*chy 2 python curve-fitting scipy
我正在研究在scipy中拟合3d分布函数.我有一个numpy数组,其中有x和y-bin的计数,我试图将其与一个相当复杂的三维分布函数相匹配.数据适合26(!)参数,这些参数描述了其两个成分群体的形状.
我在这里学到了当我调用leastsq时,我必须将我的x和y坐标作为'args'传递.unutbu提供的代码是为我编写的,但当我尝试将其应用于我的特定情况时,我收到错误"TypeError:leastsq()得到关键字参数'args'的多个值"
这是我的代码(抱歉长度):
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as spopt
from textwrap import wrap
import collections
cl = 0.5
ch = 3.5
rl = -23.5
rh = -18.5
mbins = 10
cbins = 10
def hist_data(mixed_data, mbins, cbins):
import numpy as np
H, xedges, yedges = np.histogram2d(mixed_data[:,1], mixed_data[:,2], bins = (mbins, cbins), weights = mixed_data[:,3])
x, y = 0.5 * (xedges[:-1] + xedges[1:]), 0.5 * (yedges[:-1] + yedges[1:])
return H.T, x, y
def gauss(x, s, mu, a):
import numpy as np
return a * np.exp(-((x - mu)**2. / (2. * s**2.)))
def tanhlin(x, p0, p1, q0, q1, q2):
import numpy as np
return p0 + p1 * (x + 20.) + q0 * np.tanh((x - q1)/q2)
def func3d(p, x, y):
import numpy as np
from sys import exit
rsp0, rsp1, rsq0, rsq1, rsq2, rmp0, rmp1, rmq0, rmq1, rmq2, rs, rm, ra, bsp0, bsp1, bsq0, bsq1, bsq2, bmp0, bmp1, bmq0, bmq1, bmq2, bs, bm, ba = p
x, y = np.meshgrid(coords[0], coords[1])
rs = tanhlin(x, rsp0, rsp1, rsq0, rsq1, rsq2)
rm = tanhlin(x, rmp0, rmp1, rmq0, rmq1, rmq2)
ra = schechter(x, rap, raa, ram) # unused
bs = tanhlin(x, bsp0, bsp1, bsq0, bsq1, bsq2)
bm = tanhlin(x, bmp0, bmp1, bmq0, bmq1, bmq2)
ba = schechter(x, bap, baa, bam) # unused
red_dist = ra / (rs * np.sqrt(2 * np.pi)) * gauss(y, rs, rm, ra)
blue_dist = ba / (bs * np.sqrt(2 * np.pi)) * gauss(y, bs, bm, ba)
result = red_dist + blue_dist
return result
def residual(p, coords, data):
import numpy as np
model = func3d(p, coords)
res = (model.flatten() - data.flatten())
# can put parameter restrictions in here
return res
def poiss_err(data):
import numpy as np
return np.where(np.sqrt(H) > 0., np.sqrt(H), 2.)
# =====
H, x, y = hist_data(mixed_data, mbins, cbins)
data = H
coords = x, y
# x and y will be the projected coordinates of the data H onto the plane z = 0
# x has bins of width 0.5, with centers at -23.25, -22.75, ... , -19.25, -18.75
# y has bins of width 0.3, with centers at 0.65, 0.95, ... , 3.05, 3.35
Param = collections.namedtuple('Param', 'rsp0 rsp1 rsq0 rsq1 rsq2 rmp0 rmp1 rmq0 rmq1 rmq2 rs rm ra bsp0 bsp1 bsq0 bsq1 bsq2 bmp0 bmp1 bmq0 bmq1 bmq2 bs bm ba')
p_guess = Param(rsp0 = 0.152, rsp1 = 0.008, rsq0 = 0.044, rsq1 = -19.91, rsq2 = 0.94, rmp0 = 2.279, rmp1 = -0.037, rmq0 = -0.108, rmq1 = -19.81, rmq2 = 0.96, rs = 1., rm = -20.5, ra = 10000., bsp0 = 0.298, bsp1 = 0.014, bsq0 = -0.067, bsq1 = -19.90, bsq2 = 0.58, bmp0 = 1.790, bmp1 = -0.053, bmq0 = -0.363, bmq1 = -20.75, bmq2 = 1.12, bs = 1., bm = -20., ba = 2000.)
opt, cov, infodict, mesg, ier = spopt.leastsq(residual, p_guess, poiss_err(H), args = coords, maxfev = 100000, full_output = True)
Run Code Online (Sandbox Code Playgroud)
这是我的数据,只有更少的箱子:
[[ 1.00000000e+01 1.10000000e+01 2.10000000e+01 1.90000000e+01
1.70000000e+01 2.10000000e+01 2.40000000e+01 1.90000000e+01
2.80000000e+01 1.90000000e+01]
[ 1.40000000e+01 4.50000000e+01 6.00000000e+01 6.80000000e+01
1.34000000e+02 1.97000000e+02 2.23000000e+02 2.90000000e+02
3.23000000e+02 3.03000000e+02]
[ 3.00000000e+01 1.17000000e+02 3.78000000e+02 9.74000000e+02
1.71900000e+03 2.27700000e+03 2.39000000e+03 2.25500000e+03
1.85600000e+03 1.31000000e+03]
[ 1.52000000e+02 9.32000000e+02 2.89000000e+03 5.23800000e+03
6.66200000e+03 6.19100000e+03 4.54900000e+03 3.14600000e+03
2.09000000e+03 1.33800000e+03]
[ 5.39000000e+02 2.58100000e+03 6.51300000e+03 8.89900000e+03
8.52900000e+03 6.22900000e+03 3.55000000e+03 2.14300000e+03
1.19000000e+03 6.92000000e+02]
[ 1.49600000e+03 4.49200000e+03 8.77200000e+03 1.07610000e+04
9.76700000e+03 7.04900000e+03 4.23200000e+03 2.47200000e+03
1.41500000e+03 7.02000000e+02]
[ 2.31800000e+03 7.01500000e+03 1.28870000e+04 1.50840000e+04
1.35590000e+04 8.55600000e+03 4.15600000e+03 1.77100000e+03
6.57000000e+02 2.55000000e+02]
[ 1.57500000e+03 3.79300000e+03 5.20900000e+03 4.77800000e+03
3.26600000e+03 1.44700000e+03 5.31000000e+02 1.85000000e+02
9.30000000e+01 4.90000000e+01]
[ 7.01000000e+02 1.21600000e+03 1.17600000e+03 7.93000000e+02
4.79000000e+02 2.02000000e+02 8.80000000e+01 3.90000000e+01
2.30000000e+01 1.90000000e+01]
[ 2.93000000e+02 3.93000000e+02 2.90000000e+02 1.97000000e+02
1.18000000e+02 6.40000000e+01 4.10000000e+01 1.20000000e+01
1.10000000e+01 4.00000000e+00]]
Run Code Online (Sandbox Code Playgroud)
非常感谢!
那么leastsq尝试:
"最小化一组方程的平方和" - scipy docs
因为它说它正在最小化一组函数,因此如果你看一下这里的参数,你实际上并没有以最简单的方式获取任何x或y数据输入,所以你可以按照自己的意愿去做并传递一个残余函数,但它是显着的更容易使用curve_fit哪个适合你:)并创建必要的方程式
对于拟合,您应该使用:curve_fit如果您对使用的通用残差没问题,那么res = leastsq(func, p0, args=args, full_output=1, **kw)如果您查看此处的代码,这实际上是您传递的函数.
例如,如果我在2d中拟合rosenbrock函数并猜测y参数:
from scipy.optimize import curve_fit
from itertools import imap
import numpy as np
# use only an even number of arguments
def rosen2d(x,a):
return (1-x)**2 + 100*(a - (x**2))**2
#generate some random data slightly off
datax = np.array([.01*x for x in range(-10,10)])
datay = 2.3
dataz = np.array(map(lambda x: rosen2d(x,datay), datax))
optimalparams, covmatrix = curve_fit(rosen2d, datax, dataz)
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)
在4d中拟合colville函数:
from scipy.optimize import curve_fit
import numpy as np
# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
x1,x2 = x[:,0],x[:,1]
return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
90*(x3**2 - x4)**2 + \
10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off
datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5
#calculate the function
dataz = colville(datax, x3, x4)
#fit the function
optimalparams, covmatrix = curve_fit(colville, datax, dataz)
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)
使用自定义残差函数:
from scipy.optimize import leastsq
import numpy as np
# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
x1,x2 = x[:,0],x[:,1]
return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
90*(x3**2 - x4)**2 + \
10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off
datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5
def residual(p, x, y):
return y - colville(x,*p)
#calculate the function
dataz = colville(datax, x3, x4)
#guess some initial parameter values
p0 = [0,0]
#calculate a minimization of the residual
optimalparams = leastsq(residual, p0, args=(datax, dataz))[0]
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)
编辑:您使用了位置和关键字arg args:如果您查看文档,您将看到它使用位置3,但也可以用作关键字参数.你使用了这两个意味着功能是预期的,困惑的.
| 归档时间: |
|
| 查看次数: |
7144 次 |
| 最近记录: |