Emr*_*koz 3 foreach cuda gpgpu thrust
我正在尝试使用CUDA的推力库在CUDA中修改一个简单的动态矢量.但是我在屏幕上出现"launch_closure_by_value"错误,表明错误与某些同步过程有关.
由于此错误,无法进行简单的1D动态数组修改.
导致错误的我的代码段如下.
从.cpp文件我调用setIndexedGrid,它在System.cu中定义
float* a= (float*)(malloc(8*sizeof(float)));
a[0]= 0; a[1]= 1; a[2]= 2; a[3]= 3; a[4]= 4; a[5]= 5; a[6]= 6; a[7]= 7;
float* b = (float*)(malloc(8*sizeof(float)));
setIndexedGridInfo(a,b);
Run Code Online (Sandbox Code Playgroud)
System.cu的代码段:
void
setIndexedGridInfo(float* a, float*b)
{
thrust::device_ptr<float> d_oldData(a);
thrust::device_ptr<float> d_newData(b);
float c = 0.0;
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(d_oldData,d_newData)),
thrust::make_zip_iterator(thrust::make_tuple(d_oldData+8,d_newData+8)),
grid_functor(c));
}
Run Code Online (Sandbox Code Playgroud)
grid_functor在_kernel.cu中定义
struct grid_functor
{
float a;
__host__ __device__
grid_functor(float grid_Info) : a(grid_Info) {}
template <typename Tuple>
__device__
void operator()(Tuple t)
{
volatile float data = thrust::get<0>(t);
float pos = data + 0.1;
thrust::get<1>(t) = pos;
}
};
Run Code Online (Sandbox Code Playgroud)
我也在Output窗口中获取这些(我使用Visual Studio):
Particles.exe中0x000007fefdc7cacd的第一次机会异常:Microsoft C++异常:内存位置为0x0029eb60的cudaError_enum .. smokeParticles.exe中0x000007fefdc7cacd的第一次机会异常:Microsoft C++异常:在内存位置0x0029ecf0的thrust :: system :: system_error .. Particles.exe中0x000007fefdc7cacd处的未处理异常:Microsoft C++异常:在内存位置0x0029ecf0的thrust :: system :: system_error ..
是什么导致了这个问题?
您正试图在期望设备内存中的指针的函数中使用主机内存指针.这段代码就是问题:
float* a= (float*)(malloc(8*sizeof(float)));
a[0]= 0; a[1]= 1; a[2]= 2; a[3]= 3; a[4]= 4; a[5]= 5; a[6]= 6; a[7]= 7;
float* b = (float*)(malloc(8*sizeof(float)));
setIndexedGridInfo(a,b);
.....
thrust::device_ptr<float> d_oldData(a);
thrust::device_ptr<float> d_newData(b);
Run Code Online (Sandbox Code Playgroud)
的thrust::device_ptr
是用于"包装"一个设备存储器指针与CUDA API分配,使得推力可以使用它.您正在尝试将主机指针直接视为设备指针.这是非法的.您可以setIndexedGridInfo
像这样修改您的功能:
void setIndexedGridInfo(float* a, float*b, const int n)
{
thrust::device_vector<float> d_oldData(a,a+n);
thrust::device_vector<float> d_newData(b,b+n);
float c = 0.0;
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(d_oldData.begin(),d_newData.begin())),
thrust::make_zip_iterator(thrust::make_tuple(d_oldData.end(),d_newData.end())),
grid_functor(c));
}
Run Code Online (Sandbox Code Playgroud)
该device_vector
构造函数将分配设备内存中,然后你的主机内存中的内容复制到设备.这应该可以解决你所看到的错误,虽然我不确定你要用for_each
迭代器做什么,以及你有没有这个算法是正确的.
编辑:
这是一个完整的,可编译的,可运行的代码版本:
#include <cstdlib>
#include <cstdio>
#include <thrust/device_vector.h>
#include <thrust/for_each.h>
#include <thrust/copy.h>
struct grid_functor
{
float a;
__host__ __device__
grid_functor(float grid_Info) : a(grid_Info) {}
template <typename Tuple>
__device__
void operator()(Tuple t)
{
volatile float data = thrust::get<0>(t);
float pos = data + 0.1f;
thrust::get<1>(t) = pos;
}
};
void setIndexedGridInfo(float* a, float*b, const int n)
{
thrust::device_vector<float> d_oldData(a,a+n);
thrust::device_vector<float> d_newData(b,b+n);
float c = 0.0;
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(d_oldData.begin(),d_newData.begin())),
thrust::make_zip_iterator(thrust::make_tuple(d_oldData.end(),d_newData.end())),
grid_functor(c));
thrust::copy(d_newData.begin(), d_newData.end(), b);
}
int main(void)
{
const int n = 8;
float* a= (float*)(malloc(n*sizeof(float)));
a[0]= 0; a[1]= 1; a[2]= 2; a[3]= 3; a[4]= 4; a[5]= 5; a[6]= 6; a[7]= 7;
float* b = (float*)(malloc(n*sizeof(float)));
setIndexedGridInfo(a,b,n);
for(int i=0; i<n; i++) {
fprintf(stdout, "%d (%f,%f)\n", i, a[i], b[i]);
}
return 0;
}
Run Code Online (Sandbox Code Playgroud)
我可以使用CUDA 4.1在OS 10.6.8主机上编译和运行此代码,如下所示:
$ nvcc -Xptxas="-v" -arch=sm_12 -g -G thrustforeach.cu
./thrustforeach.cu(18): Warning: Cannot tell what pointer points to, assuming global memory space
./thrustforeach.cu(20): Warning: Cannot tell what pointer points to, assuming global memory space
./thrustforeach.cu(18): Warning: Cannot tell what pointer points to, assuming global memory space
./thrustforeach.cu(20): Warning: Cannot tell what pointer points to, assuming global memory space
ptxas info : Compiling entry function '_ZN6thrust6detail7backend4cuda6detail23launch_closure_by_valueINS2_18for_each_n_closureINS_12zip_iteratorINS_5tupleINS0_15normal_iteratorINS_10device_ptrIfEEEESB_NS_9null_typeESC_SC_SC_SC_SC_SC_SC_EEEEi12grid_functorEEEEvT_' for 'sm_12'
ptxas info : Used 14 registers, 160+0 bytes lmem, 16+16 bytes smem, 4 bytes cmem[1]
ptxas info : Compiling entry function '_ZN6thrust6detail7backend4cuda6detail23launch_closure_by_valueINS2_18for_each_n_closureINS_12zip_iteratorINS_5tupleINS0_15normal_iteratorINS_10device_ptrIfEEEESB_NS_9null_typeESC_SC_SC_SC_SC_SC_SC_EEEEj12grid_functorEEEEvT_' for 'sm_12'
ptxas info : Used 14 registers, 160+0 bytes lmem, 16+16 bytes smem, 4 bytes cmem[1]
$ ./a.out
0 (0.000000,0.100000)
1 (1.000000,1.100000)
2 (2.000000,2.100000)
3 (3.000000,3.100000)
4 (4.000000,4.100000)
5 (5.000000,5.100000)
6 (6.000000,6.100000)
7 (7.000000,7.100000)
Run Code Online (Sandbox Code Playgroud)
归档时间: |
|
查看次数: |
3153 次 |
最近记录: |