曲线拟合Scipy与3d数据和参数

Dat*_*chy 2 python curve-fitting scipy

我正在研究在scipy中拟合3d分布函数.我有一个numpy数组,其中有x和y-bin的计数,我试图将其与一个相当复杂的三维分布函数相匹配.数据适合26(!)参数,这些参数描述了其两个成分群体的形状.

我在这里学到了当我调用leastsq时,我必须将我的x和y坐标作为'args'传递.unutbu提供的代码是为我编写的,但当我尝试将其应用于我的特定情况时,我收到错误"TypeError:leastsq()得到关键字参数'args'的多个值"

这是我的代码(抱歉长度):

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as spopt
from textwrap import wrap
import collections

cl = 0.5
ch = 3.5
rl = -23.5
rh = -18.5
mbins = 10
cbins = 10

def hist_data(mixed_data, mbins, cbins):
    import numpy as np
    H, xedges, yedges = np.histogram2d(mixed_data[:,1], mixed_data[:,2], bins = (mbins, cbins), weights = mixed_data[:,3])
    x, y = 0.5 * (xedges[:-1] + xedges[1:]), 0.5 * (yedges[:-1] + yedges[1:])
    return H.T, x, y

def gauss(x, s, mu, a):
    import numpy as np
    return a * np.exp(-((x - mu)**2. / (2. * s**2.)))

def tanhlin(x, p0, p1, q0, q1, q2):
    import numpy as np
    return p0 + p1 * (x + 20.) + q0 * np.tanh((x - q1)/q2)

def func3d(p, x, y):
    import numpy as np
    from sys import exit
    rsp0, rsp1, rsq0, rsq1, rsq2, rmp0, rmp1, rmq0, rmq1, rmq2, rs, rm, ra, bsp0, bsp1, bsq0, bsq1, bsq2, bmp0, bmp1, bmq0, bmq1, bmq2, bs, bm, ba = p
x, y = np.meshgrid(coords[0], coords[1])
    rs = tanhlin(x, rsp0, rsp1, rsq0, rsq1, rsq2)
    rm = tanhlin(x, rmp0, rmp1, rmq0, rmq1, rmq2)
    ra = schechter(x, rap, raa, ram) # unused
    bs = tanhlin(x, bsp0, bsp1, bsq0, bsq1, bsq2)
    bm = tanhlin(x, bmp0, bmp1, bmq0, bmq1, bmq2)
    ba = schechter(x, bap, baa, bam) # unused
    red_dist = ra / (rs * np.sqrt(2 * np.pi)) * gauss(y, rs, rm, ra)
    blue_dist = ba / (bs * np.sqrt(2 * np.pi)) * gauss(y, bs, bm, ba)
    result = red_dist + blue_dist
return result

def residual(p, coords, data):
    import numpy as np
    model = func3d(p, coords)
    res = (model.flatten() - data.flatten())
    # can put parameter restrictions in here
    return res

def poiss_err(data):
    import numpy as np
    return np.where(np.sqrt(H) > 0., np.sqrt(H), 2.)

# =====

H, x, y = hist_data(mixed_data, mbins, cbins)

data = H

coords = x, y
# x and y will be the projected coordinates of the data H onto the plane z = 0

# x has bins of width 0.5, with centers at -23.25, -22.75, ... , -19.25, -18.75
# y has bins of width 0.3, with centers at 0.65, 0.95, ... , 3.05, 3.35    

Param = collections.namedtuple('Param', 'rsp0 rsp1 rsq0 rsq1 rsq2 rmp0 rmp1 rmq0 rmq1 rmq2 rs rm ra bsp0 bsp1 bsq0 bsq1 bsq2 bmp0 bmp1 bmq0 bmq1 bmq2 bs bm ba')
p_guess = Param(rsp0 = 0.152, rsp1 = 0.008, rsq0 = 0.044, rsq1 = -19.91, rsq2 = 0.94, rmp0 = 2.279, rmp1 = -0.037, rmq0 = -0.108, rmq1 = -19.81, rmq2 = 0.96, rs = 1., rm = -20.5, ra = 10000., bsp0 = 0.298, bsp1 = 0.014, bsq0 = -0.067, bsq1 = -19.90, bsq2 = 0.58, bmp0 = 1.790, bmp1 = -0.053, bmq0 = -0.363, bmq1 = -20.75, bmq2 = 1.12, bs = 1., bm = -20., ba = 2000.)

opt, cov, infodict, mesg, ier = spopt.leastsq(residual, p_guess, poiss_err(H), args = coords, maxfev = 100000, full_output = True)
Run Code Online (Sandbox Code Playgroud)

这是我的数据,只有更少的箱子:

[[  1.00000000e+01   1.10000000e+01   2.10000000e+01   1.90000000e+01
1.70000000e+01   2.10000000e+01   2.40000000e+01   1.90000000e+01
2.80000000e+01   1.90000000e+01]
[  1.40000000e+01   4.50000000e+01   6.00000000e+01   6.80000000e+01
1.34000000e+02   1.97000000e+02   2.23000000e+02   2.90000000e+02
3.23000000e+02   3.03000000e+02]
[  3.00000000e+01   1.17000000e+02   3.78000000e+02   9.74000000e+02
1.71900000e+03   2.27700000e+03   2.39000000e+03   2.25500000e+03
1.85600000e+03   1.31000000e+03]
[  1.52000000e+02   9.32000000e+02   2.89000000e+03   5.23800000e+03
6.66200000e+03   6.19100000e+03   4.54900000e+03   3.14600000e+03
2.09000000e+03   1.33800000e+03]
[  5.39000000e+02   2.58100000e+03   6.51300000e+03   8.89900000e+03
8.52900000e+03   6.22900000e+03   3.55000000e+03   2.14300000e+03
1.19000000e+03   6.92000000e+02]
[  1.49600000e+03   4.49200000e+03   8.77200000e+03   1.07610000e+04
9.76700000e+03   7.04900000e+03   4.23200000e+03   2.47200000e+03
1.41500000e+03   7.02000000e+02]
[  2.31800000e+03   7.01500000e+03   1.28870000e+04   1.50840000e+04
1.35590000e+04   8.55600000e+03   4.15600000e+03   1.77100000e+03
6.57000000e+02   2.55000000e+02]
[  1.57500000e+03   3.79300000e+03   5.20900000e+03   4.77800000e+03
3.26600000e+03   1.44700000e+03   5.31000000e+02   1.85000000e+02
9.30000000e+01   4.90000000e+01]
[  7.01000000e+02   1.21600000e+03   1.17600000e+03   7.93000000e+02
4.79000000e+02   2.02000000e+02   8.80000000e+01   3.90000000e+01
2.30000000e+01   1.90000000e+01]
[  2.93000000e+02   3.93000000e+02   2.90000000e+02   1.97000000e+02
1.18000000e+02   6.40000000e+01   4.10000000e+01   1.20000000e+01
1.10000000e+01   4.00000000e+00]]
Run Code Online (Sandbox Code Playgroud)

非常感谢!

Eiy*_*uyf 6

那么leastsq尝试:

"最小化一组方程的平方和" - scipy docs

因为它说它正在最小化一组函数,因此如果你看一下这里的参数,你实际上并没有以最简单的方式获取任何x或y数据输入,所以你可以按照自己的意愿去做并传递一个残余函数,但它是显着的更容易使用curve_fit哪个适合你:)并创建必要的方程式

对于拟合,您应该使用:curve_fit如果您对使用的通用残差没问题,那么res = leastsq(func, p0, args=args, full_output=1, **kw)如果您查看此处代码,这实际上是您传递的函数.

例如,如果我在2d中拟合rosenbrock函数并猜测y参数:

from scipy.optimize import curve_fit
from itertools import imap
import numpy as np
# use only an even number of arguments
def rosen2d(x,a):
    return (1-x)**2 + 100*(a - (x**2))**2
#generate some random data slightly off

datax = np.array([.01*x for x in range(-10,10)])
datay = 2.3
dataz = np.array(map(lambda x: rosen2d(x,datay), datax))
optimalparams, covmatrix = curve_fit(rosen2d, datax, dataz)
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)

在4d中拟合colville函数:

from scipy.optimize import curve_fit
import numpy as np

# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
    x1,x2 = x[:,0],x[:,1]
    return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
            90*(x3**2 - x4)**2 + \
            10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
            19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off

datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5
#calculate the function
dataz = colville(datax, x3, x4)
#fit the function
optimalparams, covmatrix = curve_fit(colville, datax, dataz)
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)

使用自定义残差函数:

from scipy.optimize import leastsq
import numpy as np

# 4 dimensional colville function
# definition from http://www.sfu.ca/~ssurjano/colville.html
def colville(x,x3,x4):
    x1,x2 = x[:,0],x[:,1]
    return 100*(x1**2 - x2)**2 + (x1-1)**2 + (x3-1)**2 + \
            90*(x3**2 - x4)**2 + \
            10.1*((x2 - 1)**2 + (x4 - 1)**2) + \
            19.8*(x2 - 1)*(x4 - 1)
#generate some random data slightly off


datax = np.array([[x,x] for x in range(-10,10)])
#add gaussian noise
datax+= np.random.rand(*datax.shape)
#set 2 of the 4 parameters to constants
x3 = 3.5
x4 = 4.5

def residual(p, x, y):
    return y - colville(x,*p)
#calculate the function
dataz = colville(datax, x3, x4)
#guess some initial parameter values
p0 = [0,0]
#calculate a minimization of the residual
optimalparams = leastsq(residual, p0, args=(datax, dataz))[0]
print 'opt:',optimalparams
Run Code Online (Sandbox Code Playgroud)

编辑:您使用了位置和关键字arg args:如果您查看文档,您将看到它使用位置3,但也可以用作关键字参数.你使用两个意味着功能是预期的,困惑的.