我写了这样的示例代码.
int ** d_ptr;
cudaMalloc( (void**)&d_ptr, sizeof(int*)*N );
int* tmp_ptr[N];
for(int i=0; i<N; i++)
cudaMalloc( (void**)&tmp_ptr[i], sizeof(int)*SIZE );
cudaMemcpy(d_ptr, tmp_ptr, sizeof(tmp_ptr), cudaMemcpyHostToDevice);
Run Code Online (Sandbox Code Playgroud)
这段代码运行良好但在内核启动后我无法收到结果.
int* Mtx_on_GPU[N];
cudaMemcpy(Mtx_on_GPU, d_ptr, sizeof(int)*N*SIZE, cudaMemcpyDeviceToHost);
Run Code Online (Sandbox Code Playgroud)
此时,发生段错误错误.但我不知道我错了什么.
int* Mtx_on_GPU[N];
for(int i=0; i<N; i++)
cudaMemcpy(Mtx_on_GPU[i], d_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
Run Code Online (Sandbox Code Playgroud)
此代码也有相同的错误.
我认为我的代码肯定会有一些错误,但我无法在白天找到它.
给我一些建议.
在最后一行
cudaMemcpy(Mtx_on_GPU[i], d_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
Run Code Online (Sandbox Code Playgroud)
您正在尝试将数据从设备复制到主机(注意:我假设您为Mtx_on_GPU指针分配了主机内存!)
但是,指针存储在设备内存中,因此您无法直接从主机端访问.这条线应该是
cudaMemcpy(Mtx_on_GPU[i], temp_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
Run Code Online (Sandbox Code Playgroud)
使用"过于复杂"的变量名称时,这可能会变得更加清晰:
int ** devicePointersStoredInDeviceMemory;
cudaMalloc( (void**)&devicePointersStoredInDeviceMemory, sizeof(int*)*N);
int* devicePointersStoredInHostMemory[N];
for(int i=0; i<N; i++)
cudaMalloc( (void**)&devicePointersStoredInHostMemory[i], sizeof(int)*SIZE );
cudaMemcpy(
devicePointersStoredInDeviceMemory,
devicePointersStoredInHostMemory,
sizeof(int*)*N, cudaMemcpyHostToDevice);
// Invoke kernel here, passing "devicePointersStoredInDeviceMemory"
// as an argument
...
int* hostPointersStoredInHostMemory[N];
for(int i=0; i<N; i++) {
int* hostPointer = hostPointersStoredInHostMemory[i];
// (allocate memory for hostPointer here!)
int* devicePointer = devicePointersStoredInHostMemory[i];
cudaMemcpy(hostPointer, devicePointer, sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
}
Run Code Online (Sandbox Code Playgroud)
编辑回应评论:
这d_ptr是"一系列指针".但是这个数组的内存是分配的cudaMalloc.这意味着它位于设备上.与此相反,int* Mtx_on_GPU[N];你在主机内存中"分配"N个指针.您也可以使用,而不是指定数组大小malloc.比较以下分配时可能会更清楚:
int** pointersStoredInDeviceMemory;
cudaMalloc((void**)&pointersStoredInDeviceMemory, sizeof(int*)*N);
int** pointersStoredInHostMemory;
pointersStoredInHostMemory = (void**)malloc(N * sizeof(int*));
// This is not possible, because the array was allocated with cudaMalloc:
int *pointerA = pointersStoredInDeviceMemory[0];
// This is possible because the array was allocated with malloc:
int *pointerB = pointersStoredInHostMemory[0];
Run Code Online (Sandbox Code Playgroud)
跟踪它可能有点大脑扭曲
但幸运的是,它几乎不会超过2个间接.
| 归档时间: |
|
| 查看次数: |
4355 次 |
| 最近记录: |