Mey*_*emi 5 python porting pymc pymc3 probabilistic-programming
在这里,我的目的是估算由下式给出的阻尼谐波振荡器的参数(伽马和ω)
dX ^ 2 / dt ^ 2 +γ* dX / dt +(2 * pi * omega)^ 2 * X = 0。(我们可以向系统中添加高斯白噪声。)
import pymc
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt;
from scipy.integrate import odeint
#import data
xdata = sio.loadmat('T.mat')['T'][0] #time
ydata1 = sio.loadmat('V1.mat')['V1'][0] # V2=dV1/dt, (X=V1),
ydata2 = sio.loadmat('V2.mat')['V2'][0] # dV2/dt=-(2pi*omega)^2*V1-gama*V2
#time span for solving the equations
npts= 500
dt=0.01
Tspan=5.0
time = np.linspace(0,Tspan,npts+1)
#initial condition
V0 = [1.0, 1.0]
# Priors for unknown model parameters
sigma = pymc.Uniform('sigma', 0.0, 100.0)
gama= pymc.Uniform('gama', 0.0, 20.0)
omega=pymc.Uniform('omega',0.0, 20.0)
#Solve the equations
@pymc.deterministic
def DHOS(gama=gama, omega=omega):
V1= np.zeros(npts+1)
V2= np.zeros(npts+1)
V1[0] = V0[0]
V2[0] = V0[1]
for i in range(1,npts+1):
V1[i]= V1[i-1] + dt*V2[i-1];
V2[i] = V2[i-1] + dt*(-((2*np.pi*omega)**2)*V1[i-1]-gama*V2[i-1]);
return [V1, V2]
#or we can use odeint
#@pymc.deterministic
#def DHS( gama=gama, omega=omega):
# def DOS_func(y, time):
# V1, V2 = y[0], y[1]
# dV1dt = V2
# dV2dt = -((2*np.pi*omega)**2)* V1 -gama*V2
# dydt = [dV1dt, dV2dt]
# return dydt
# soln = odeint(DOS_func,V0, time)
# V1, V2 = soln[:,0], soln[:,1]
# return V1, V2
# value of outcome (observations)
V1 = pymc.Lambda('V1', lambda DHOS=DHOS: DHOS[0])
V2 = pymc.Lambda('V2', lambda DHOS=DHOS: DHOS[1])
# liklihood of observations
Yobs1 = pymc.Normal('Yobs1', mu=V1, tau=1.0/sigma**2, value=ydata1, observed=True)
Yobs2 = pymc.Normal('Yobs2', mu=V2, tau=1.0/sigma**2, value=ydata2, observed=True)
Run Code Online (Sandbox Code Playgroud)
通过将上面的代码另存为DampedOscil_model.py,我们可以如下运行PYMC
import pymc
import DampedOscil_model
MDL = pymc.MCMC(DampedOscil_model, db='pickle')
MDL.sample(iter=1e4, burn=1e2, thin=2)
gama_trace=MDL.trace('gama')[- 1000:]
omega_trace=MDL.trace('omega')[-1000:]
gama=MDL.gama.value
omega=MDL.omega.value
Run Code Online (Sandbox Code Playgroud)
而且效果很好(请参见下文)。
由gama_true = 2.0和omega_est = 1.5构成的真实信号与估计信号的关系。估计的参数值为gama_est = 2.04和omega_est = 1.49
现在,我将这段代码转换为PYMC3以使用NUTS和ADVI。
import matplotlib.pyplot as plt
import scipy.io as sio
import pandas as pd
import numpy as np
import pymc3 as pm
import theano.tensor as tt
import theano
from pymc3 import Model, Normal, HalfNormal, Uniform
from pymc3 import NUTS, find_MAP, sample, Slice, traceplot, summary
from pymc3 import Deterministic
from scipy.optimize import fmin_powell
#import data
xdata = sio.loadmat('T.mat')['T'][0] #time
ydata1 = sio.loadmat('V1.mat')['V1'][0] # V2=dV1/dt, (X=V1),
ydata2 = sio.loadmat('V2.mat')['V2'][0] # dV2/dt=-(2pi*omega)^2*V1-gama*V2
#time span for solving the equations
npts= 500
dt=0.01
Tspan=5.0
time = np.linspace(0,Tspan,npts+1)
niter=10000
burn=niter//2;
with pm.Model() as model:
#Priors for unknown model parameters
sigma = pm.HalfNormal('sigma', sd=1)
gama= pm.Uniform('gama', 0.0, 20.0)
omega=pm.Uniform('omega',0.0, 20.0)
#initial condition
V0 = [1.0, 1.0]
#Solve the equations
# do I need to use theano.tensor here?!
@theano.compile.ops.as_op(itypes=[tt.dscalar, tt.dscalar],otypes=[tt.dvector])
def DHOS(gama=gama, omega=omega):
V1= np.zeros(npts+1)
V2= np.zeros(npts+1)
V1[0] = V0[0]
V2[0] = V0[1]
for i in range(1,npts+1):
V1[i]= V1[i-1] + dt*V2[i-1];
V2[i] = V2[i-1] + dt*(-((2*np.pi*1)**2)*V1[i-1]-gama*V2[i-1]);
return V1,V2
V1 = pm.Deterministic('V1', DHOS[0])
V2 = pm.Deterministic('V2', DHOS[1])
start = pm.find_MAP(fmin=fmin_powell, disp=True)
step=pm.NUTS()
trace=pm.sample(niter, step, start=start, progressbar=False)
traceplot(trace);
Summary=pm.df_summary(trace[-1000:])
gama_trace = trace.get_values('gama', burn)
omega_trace = trace.get_values('omega', burn)
Run Code Online (Sandbox Code Playgroud)
对于此代码,我得到以下错误:V1 = pm.Deterministic('V1',DHOS [0])
TypeError: 'FromFunctionOp' object does not support indexing
Run Code Online (Sandbox Code Playgroud)
简而言之,我想知道如何将PYMC代码的以下部分转换为PYMC3。
@pymc.deterministic
def DOS(gama=gama, omega=omega):
V1= np.zeros(npts+1)
V2= np.zeros(npts+1)
V1[0] = V0[0]
V2[0] = V0[1]
for i in range(1,npts+1):
V1[i]= V1[i-1] + dt*V2[i-1];
V2[i] = V2[i-1] + dt*(-((2*np.pi*omega)**2)*V1[i-1]-gama*V2[i-1]);
return [V1, V2]
V1 = pymc.Lambda('V1', lambda DOS=DOS: DOS[0])
V2 = pymc.Lambda('V2', lambda DOS=DOS: DOS[1])
Run Code Online (Sandbox Code Playgroud)
问题是,首先,PYMC3中确定性函数的论证与PYMC不同,其次,PYMC3中没有Lambda函数。
感谢您在解决PYMC3中的ODE方面的帮助,以解决生物系统中的参数估计任务(从数据估计方程参数)。
在此先感谢您的帮助。
亲切的问候,
梅萨姆
我建议并已成功实施,使用“黑匣子”方法与 PYMC3 进行交互。在这种情况下,这意味着您自己计算对数似然,然后使用 PYMC3 对其进行采样。这需要以 Theano 和 PYMC3 可以与它们交互的方式编写函数。
PYMC3 页面上的笔记本对此进行了概述,该笔记本使用 cython 作为示例。
这是需要完成的事情的简短示例。
首先,您可以加载数据并设置您需要的任何参数,例如时间步长等。
import pymc3 as pm
import numpy as np
import theano
import theano.tensor as tt
#import data
xdata = sio.loadmat('T.mat')['T'][0] #time
ydata1 = sio.loadmat('V1.mat')['V1'][0] # V2=dV1/dt, (X=V1),
ydata2 = sio.loadmat('V2.mat')['V2'][0] # dV2/dt=-(2pi*omega)^2*V1-gama*V2
#time span for solving the equations
npts= 500
dt=0.01
Tspan=5.0
time = np.linspace(0,Tspan,npts+1)
#initial condition
V0 = [1.0, 1.0]
Run Code Online (Sandbox Code Playgroud)
然后像以前一样定义一个数据生成函数,但不需要为此使用 PYMC 的任何装饰器。该函数的输出应该是您需要与数据进行比较以计算可能性的任何内容。
def DHOS(theta):
gama,omega=theta
V1= np.zeros(npts+1)
V2= np.zeros(npts+1)
V1[0] = V0[0]
V2[0] = V0[1]
for i in range(1,npts+1):
V1[i]= V1[i-1] + dt*V2[i-1];
V2[i] = V2[i-1] + dt*(-((2*np.pi*omega)**2)*V1[i-1]-gama*V2[i-1]);
return [V1, V2]
Run Code Online (Sandbox Code Playgroud)
接下来,您编写一个函数,调用前一个函数并使用您想要的任何分布(在此为正态分布)计算可能性。
def my_loglike(theta,data,sigma):
"""
A Gaussian log-likelihood function for a model with parameters given in theta
"""
model = DHOS(theta) #V1 and V2 from the DHOS function
#Here data = [ydata1,ydata2] to compare with model
#sigma is either the same shape as model or a scalar
#which corresponds to the uncertainty on the data.
return -(0.5)*sum((data - model)**2/sigma**2)
Run Code Online (Sandbox Code Playgroud)
从这里开始,您现在必须定义一个 Theano 类,以便它可以与 PYMC3 交互。
# define a theano Op for our likelihood function
class LogLike(tt.Op):
"""
Specify what type of object will be passed and returned to the Op when it is
called. In our case we will be passing it a vector of values (the parameters
that define our model) and returning a single "scalar" value (the
log-likelihood)
"""
itypes = [tt.dvector] # expects a vector of parameter values when called
otypes = [tt.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, loglike, data, sigma):
"""
Initialise the Op with various things that our log-likelihood function
requires. Below are the things that are needed in this particular
example.
Parameters
----------
loglike:
The log-likelihood (or whatever) function we've defined
data:
The "observed" data that our log-likelihood function takes in
x:
The dependent variable (aka 'x') that our model requires
sigma:
The noise standard deviation that our function requires.
"""
# add inputs as class attributes
self.likelihood = loglike
self.data = data
self.sigma = sigma
def perform(self, node, inputs, outputs):
# the method that is used when calling the Op
theta, = inputs # this will contain my variables
# call the log-likelihood function
logl = self.likelihood(theta, self.data, self.sigma)
outputs[0][0] = array(logl) # output the log-likelihood
Run Code Online (Sandbox Code Playgroud)
最后,您可以使用 PYMC3 来构建模型并相应地进行采样。
ndraws = 10000 # number of draws from the distribution
nburn = 1000 # number of "burn-in points" (which we'll discard)
# create our Op
logl = LogLike(my_loglike, rdat_sim, 10)
# use PyMC3 to sampler from log-likelihood
with pm.Model():
gama= pm.Uniform('gama', 0.0, 20.0)
omega=pm.Uniform('omega',0.0, 20.0)
# convert m and c to a tensor vector
theta = tt.as_tensor_variable([gama,omega])
# use a DensityDist (use a lamdba function to "call" the Op)
pm.DensityDist('likelihood', lambda v: logl(v), observed={'v': theta})
trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True)
Run Code Online (Sandbox Code Playgroud)
您可以使用内部绘图来查看采样结果
_ = pm.traceplot(trace)
Run Code Online (Sandbox Code Playgroud)
这只是根据链接中的示例笔记本改编的,正如那里提到的,如果您想使用 NUTS,您需要梯度信息,而您没有为自定义函数提供梯度信息。在链接中,它讨论了如何对梯度进行采样并构造它,以便您可以将其传递到采样器中,但我没有在这里展示。
此外,如果您想使用solve_ivp(或odeint或其他求解器),您所要做的就是更改DHOS函数,就像通常调用求解器一样。代码的其余部分应该可以移植到您或其他任何人需要的任何问题。
| 归档时间: |
|
| 查看次数: |
650 次 |
| 最近记录: |