kyn*_*nan 65 python memory buffer memory-management cython
有关类型化内存视图的Cython文档列出了三种分配给类型化内存视图的方法:
np.ndarray和cython.view.array.假设我没有从外部传入我的cython函数的数据,而是想分配内存并将其作为a返回np.ndarray,我选择了哪些选项?还假设该缓冲区的大小不是编译时常量,即我不能在堆栈上分配,但需要malloc选项1.
因此,3个选项可以解释如下:
from libc.stdlib cimport malloc, free
cimport numpy as np
from cython cimport view
np.import_array()
def memview_malloc(int N):
    cdef int * m = <int *>malloc(N * sizeof(int))
    cdef int[::1] b = <int[:N]>m
    free(<void *>m)
def memview_ndarray(int N):
    cdef int[::1] b = np.empty(N, dtype=np.int32)
def memview_cyarray(int N):
    cdef int[::1] b = view.array(shape=(N,), itemsize=sizeof(int), format="i")
让我感到惊讶的是,在所有三种情况下,Cython为内存分配生成了大量代码,特别是调用__Pyx_PyObject_to_MemoryviewSlice_dc_int.这表明(我可能在这里错了,我对Cython内部工作的洞察力非常有限),它首先创建一个Python对象,然后将其"转换"到内存视图中,这似乎是不必要的开销.
一个简单的基准测试并未揭示三种方法之间存在很大差异,其中2是最薄弱的方法.
推荐三种方法中的哪一种?或者有更好的选择吗?
后续问题:我想np.ndarray在函数中使用该内存视图后最终返回结果.类型化内存视图是最佳选择还是我宁愿只使用下面的旧缓冲区接口来创建一个ndarray?
cdef np.ndarray[DTYPE_t, ndim=1] b = np.empty(N, dtype=np.int32)
Vee*_*rac 67
看看这里的回答.
基本的想法是你想要cpython.array.array和cpython.array.clone(不 cython.array.*):
from cpython.array cimport array, clone
# This type is what you want and can be cast to things of
# the "double[:]" syntax, so no problems there
cdef array[double] armv, templatemv
templatemv = array('d')
# This is fast
armv = clone(templatemv, L, False)
编辑
事实证明,该线程中的基准是垃圾.这是我的设置,我的时间安排:
# cython: language_level=3
# cython: boundscheck=False
# cython: wraparound=False
import time
import sys
from cpython.array cimport array, clone
from cython.view cimport array as cvarray
from libc.stdlib cimport malloc, free
import numpy as numpy
cimport numpy as numpy
cdef int loops
def timefunc(name):
    def timedecorator(f):
        cdef int L, i
        print("Running", name)
        for L in [1, 10, 100, 1000, 10000, 100000, 1000000]:
            start = time.clock()
            f(L)
            end = time.clock()
            print(format((end-start) / loops * 1e6, "2f"), end=" ")
            sys.stdout.flush()
        print("?s")
    return timedecorator
print()
print("INITIALISATIONS")
loops = 100000
@timefunc("cpython.array buffer")
def _(int L):
    cdef int i
    cdef array[double] arr, template = array('d')
    for i in range(loops):
        arr = clone(template, L, False)
    # Prevents dead code elimination
    str(arr[0])
@timefunc("cpython.array memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr
    cdef array template = array('d')
    for i in range(loops):
        arr = clone(template, L, False)
    # Prevents dead code elimination
    str(arr[0])
@timefunc("cpython.array raw C type")
def _(int L):
    cdef int i
    cdef array arr, template = array('d')
    for i in range(loops):
        arr = clone(template, L, False)
    # Prevents dead code elimination
    str(arr[0])
@timefunc("numpy.empty_like memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr
    template = numpy.empty((L,), dtype='double')
    for i in range(loops):
        arr = numpy.empty_like(template)
    # Prevents dead code elimination
    str(arr[0])
@timefunc("malloc")
def _(int L):
    cdef int i
    cdef double* arrptr
    for i in range(loops):
        arrptr = <double*> malloc(sizeof(double) * L)
        free(arrptr)
    # Prevents dead code elimination
    str(arrptr[0])
@timefunc("malloc memoryview")
def _(int L):
    cdef int i
    cdef double* arrptr
    cdef double[::1] arr
    for i in range(loops):
        arrptr = <double*> malloc(sizeof(double) * L)
        arr = <double[:L]>arrptr
        free(arrptr)
    # Prevents dead code elimination
    str(arr[0])
@timefunc("cvarray memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr
    for i in range(loops):
        arr = cvarray((L,),sizeof(double),'d')
    # Prevents dead code elimination
    str(arr[0])
print()
print("ITERATING")
loops = 1000
@timefunc("cpython.array buffer")
def _(int L):
    cdef int i
    cdef array[double] arr = clone(array('d'), L, False)
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    # Prevents dead-code elimination
    str(d)
@timefunc("cpython.array memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr = clone(array('d'), L, False)
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    # Prevents dead-code elimination
    str(d)
@timefunc("cpython.array raw C type")
def _(int L):
    cdef int i
    cdef array arr = clone(array('d'), L, False)
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    # Prevents dead-code elimination
    str(d)
@timefunc("numpy.empty_like memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr = numpy.empty((L,), dtype='double')
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    # Prevents dead-code elimination
    str(d)
@timefunc("malloc")
def _(int L):
    cdef int i
    cdef double* arrptr = <double*> malloc(sizeof(double) * L)
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arrptr[i]
    free(arrptr)
    # Prevents dead-code elimination
    str(d)
@timefunc("malloc memoryview")
def _(int L):
    cdef int i
    cdef double* arrptr = <double*> malloc(sizeof(double) * L)
    cdef double[::1] arr = <double[:L]>arrptr
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    free(arrptr)
    # Prevents dead-code elimination
    str(d)
@timefunc("cvarray memoryview")
def _(int L):
    cdef int i
    cdef double[::1] arr = cvarray((L,),sizeof(double),'d')
    cdef double d
    for i in range(loops):
        for i in range(L):
            d = arr[i]
    # Prevents dead-code elimination
    str(d)
输出:
INITIALISATIONS
Running cpython.array buffer
0.100040 0.097140 0.133110 0.121820 0.131630 0.108420 0.112160 ?s
Running cpython.array memoryview
0.339480 0.333240 0.378790 0.445720 0.449800 0.414280 0.414060 ?s
Running cpython.array raw C type
0.048270 0.049250 0.069770 0.074140 0.076300 0.060980 0.060270 ?s
Running numpy.empty_like memoryview
1.006200 1.012160 1.128540 1.212350 1.250270 1.235710 1.241050 ?s
Running malloc
0.021850 0.022430 0.037240 0.046260 0.039570 0.043690 0.030720 ?s
Running malloc memoryview
1.640200 1.648000 1.681310 1.769610 1.755540 1.804950 1.758150 ?s
Running cvarray memoryview
1.332330 1.353910 1.358160 1.481150 1.517690 1.485600 1.490790 ?s
ITERATING
Running cpython.array buffer
0.010000 0.027000 0.091000 0.669000 6.314000 64.389000 635.171000 ?s
Running cpython.array memoryview
0.013000 0.015000 0.058000 0.354000 3.186000 33.062000 338.300000 ?s
Running cpython.array raw C type
0.014000 0.146000 0.979000 9.501000 94.160000 916.073000 9287.079000 ?s
Running numpy.empty_like memoryview
0.042000 0.020000 0.057000 0.352000 3.193000 34.474000 333.089000 ?s
Running malloc
0.002000 0.004000 0.064000 0.367000 3.599000 32.712000 323.858000 ?s
Running malloc memoryview
0.019000 0.032000 0.070000 0.356000 3.194000 32.100000 327.929000 ?s
Running cvarray memoryview
0.014000 0.026000 0.063000 0.351000 3.209000 32.013000 327.890000 ?s
("迭代"基准的原因是某些方法在这方面具有惊人的不同特征.)
按初始化速度顺序:
malloc:这是一个苛刻的世界,但速度很快.如果你需要分配很多东西并且具有无阻碍的迭代和索引性能,那就必须如此.但通常你是一个很好的选择...
cpython.array raw C type:该死的,它很快.这很安全.不幸的是,它通过Python访问其数据字段.你可以通过一个很好的技巧来避免这种情况:
arr.data.as_doubles[i]
在提高安全性的同时将其提升到标准速度!这使得它成为一个很好的替代品malloc,基本上是一个漂亮的参考计数版本!
cpython.array buffer:进入时间只有设置时间的三到四倍malloc,这看起来很棒.不幸的是,它有很大的开销(尽管boundscheck与wraparound指令相比很小).这意味着它只是真正与全安全变体竞争,但它是最快的初始化.你的选择.
cpython.array memoryview:现在比malloc初始化慢一个数量级.这是一种耻辱,但它的迭代速度一样快.这是我建议的标准解决方案,除非boundscheck或wraparound正在进行(在这种情况下cpython.array buffer可能是更引人注目的权衡).
其余的部分.numpy由于附加在物体上的许多有趣的方法,唯一有价值的是's'.不过就是这样.
| 归档时间: | 
 | 
| 查看次数: | 12714 次 | 
| 最近记录: |