Nef*_*rin 5 python performance interpolation numpy cython
我有以下代码用于插入 3D 体积数据。
Y, X, Z = np.shape(volume)
xs = np.arange(0, X)
ys = np.arange(0, Y)
zs = np.arange(0, Z)
points = list(zip(np.ravel(result[:, :, :, 1]), np.ravel(result[:, :, :, 0]), np.ravel(result[:, :, :, 2])))
interp = interpolate.RegularGridInterpolator((ys, xs, zs), volume,
bounds_error=False, fill_value=0, method='linear')
new_volume = interp(points)
new_volume = np.reshape(new_volume, (Y, X, Z))
Run Code Online (Sandbox Code Playgroud)
这段代码在 512x512x110 体积(约 2900 万个点)上执行大约需要 37 秒,这导致每个体素超过一微秒(这对我来说是不可接受的时间 - 更重要的是它使用了 4 个内核)。调用new_volume=interp(points)
占用了大约 80% 的 prodecure 时间和列表创建几乎整个剩余时间。
是否有任何简单(甚至更复杂)的方法可以使此计算更快?或者有什么好的 Python 库可以提供更快的插值?我的音量和积分在每次调用此 prodecure 时都会发生变化。
这是您的cython
解决方案的稍微修改版本:
import numpy as np
cimport numpy as np
from libc.math cimport floor
from cython cimport boundscheck, wraparound, nonecheck, cdivision
DTYPE = np.float
ctypedef np.float_t DTYPE_t
@boundscheck(False)
@wraparound(False)
@nonecheck(False)
def interp3D(DTYPE_t[:,:,::1] v, DTYPE_t[:,:,::1] xs, DTYPE_t[:,:,::1] ys, DTYPE_t[:,:,::1] zs):
cdef int X, Y, Z
X,Y,Z = v.shape[0], v.shape[1], v.shape[2]
cdef np.ndarray[DTYPE_t, ndim=3] interpolated = np.zeros((X, Y, Z), dtype=DTYPE)
_interp3D(&v[0,0,0], &xs[0,0,0], &ys[0,0,0], &zs[0,0,0], &interpolated[0,0,0], X, Y, Z)
return interpolated
@cdivision(True)
cdef inline void _interp3D(DTYPE_t *v, DTYPE_t *x_points, DTYPE_t *y_points, DTYPE_t *z_points,
DTYPE_t *result, int X, int Y, int Z):
cdef:
int i, x0, x1, y0, y1, z0, z1, dim
DTYPE_t x, y, z, xd, yd, zd, c00, c01, c10, c11, c0, c1, c
dim = X*Y*Z
for i in range(dim):
x = x_points[i]
y = y_points[i]
z = z_points[i]
x0 = <int>floor(x)
x1 = x0 + 1
y0 = <int>floor(y)
y1 = y0 + 1
z0 = <int>floor(z)
z1 = z0 + 1
xd = (x-x0)/(x1-x0)
yd = (y-y0)/(y1-y0)
zd = (z-z0)/(z1-z0)
if x0 >= 0 and y0 >= 0 and z0 >= 0:
c00 = v[Y*Z*x0+Z*y0+z0]*(1-xd) + v[Y*Z*x1+Z*y0+z0]*xd
c01 = v[Y*Z*x0+Z*y0+z1]*(1-xd) + v[Y*Z*x1+Z*y0+z1]*xd
c10 = v[Y*Z*x0+Z*y1+z0]*(1-xd) + v[Y*Z*x1+Z*y1+z0]*xd
c11 = v[Y*Z*x0+Z*y1+z1]*(1-xd) + v[Y*Z*x1+Z*y1+z1]*xd
c0 = c00*(1-yd) + c10*yd
c1 = c01*(1-yd) + c11*yd
c = c0*(1-zd) + c1*zd
else:
c = 0
result[i] = c
Run Code Online (Sandbox Code Playgroud)
结果仍然和你的一样。通过随机网格数据,60x60x60
我获得以下时间:
SciPy's solution: 982ms
Your cython solution: 24.7ms
Above modified cython solution: 8.17ms
Run Code Online (Sandbox Code Playgroud)
所以它比您的解决方案快了近 4 倍cython
。注意
@boundscheck(False)
。range
在prange
您的for loop
.希望这可以帮助。
归档时间: |
|
查看次数: |
3298 次 |
最近记录: |