Ave*_*ach 2 cuda memory-alignment
我的代码给出了一条错误消息,我试图找出它的原因.为了更容易找到问题,我删除了显然与导致错误消息无关的代码.如果您能告诉我为什么以下简单代码会产生错误消息,那么我认为我应该能够修复原始代码:
#include "cuComplex.h"
#include <cutil.h>
__device__ void compute_energy(void *data, int isample, int nsamples) {
cuDoubleComplex * const nminusarray = (cuDoubleComplex*)data;
cuDoubleComplex * const f = (cuDoubleComplex*)(nminusarray+101);
double * const abs_est_errorrow_all = (double*)(f+3);
double * const rel_est_errorrow_all = (double*)(abs_est_errorrow_all+nsamples*51);
int * const iid_all = (int*)(rel_est_errorrow_all+nsamples*51);
int * const iiu_all = (int*)(iid_all+nsamples*21);
int * const piv_all = (int*)(iiu_all+nsamples*21);
cuDoubleComplex * const energyrow_all = (cuDoubleComplex*)(piv_all+nsamples*12);
cuDoubleComplex * const refinedenergyrow_all = (cuDoubleComplex*)(energyrow_all+nsamples*51);
cuDoubleComplex * const btplus_all = (cuDoubleComplex*)(refinedenergyrow_all+nsamples*51);
cuDoubleComplex * const btplus = btplus_all+isample*21021;
btplus[0] = make_cuDoubleComplex(0.0, 0.0);
}
__global__ void computeLamHeight(void *data, int nlambda) {
compute_energy(data, blockIdx.x, nlambda);
}
int main(int argc, char *argv[]) {
void *device_data;
CUT_DEVICE_INIT(argc, argv);
CUDA_SAFE_CALL(cudaMalloc(&device_data, 184465640));
computeLamHeight<<<dim3(101, 1, 1), dim3(512, 1, 1), 45000>>>(device_data, 101);
CUDA_SAFE_CALL(cudaThreadSynchronize());
}
Run Code Online (Sandbox Code Playgroud)
我正在使用GeForce GTX 480,我正在编译代码:
nvcc -L /soft/cuda-sdk/4.0.17/C/lib -I /soft/cuda-sdk/4.0.17/C/common/inc -lcutil_x86_64 -arch sm_13 -O3 -Xopencc "-Wall" Main.cu
Run Code Online (Sandbox Code Playgroud)
输出是:
Using device 0: GeForce GTX 480
Cuda error in file 'Main.cu' in line 31 : unspecified launch failure.
Run Code Online (Sandbox Code Playgroud)
编辑:我现在进一步简化了代码.以下更简单的代码仍然会生成错误消息:
#include <cutil.h>
__global__ void compute_energy(void *data) {
*(double*)((int*)data+101) = 0.0;
}
int main(int argc, char *argv[]) {
void *device_data;
CUT_DEVICE_INIT(argc, argv);
CUDA_SAFE_CALL(cudaMalloc(&device_data, 101*sizeof(int)+sizeof(double)));
compute_energy<<<dim3(1, 1, 1), dim3(1, 1, 1)>>>(device_data);
CUDA_SAFE_CALL(cudaThreadSynchronize());
}
Run Code Online (Sandbox Code Playgroud)
现在很容易看出偏移应该是有效的.我尝试运行cuda-memcheck,它说如下:
========= CUDA-MEMCHECK
Using device 0: GeForce GTX 480
Cuda error in file 'Main.cu' in line 13 : unspecified launch failure.
========= Invalid __global__ write of size 8
========= at 0x00000020 in compute_energy
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x200200194 is misaligned
=========
========= ERROR SUMMARY: 1 error
Run Code Online (Sandbox Code Playgroud)
我试着在互联网上搜索地址未对齐的含义,但我没有找到解释.这笔交易是什么?
使用所有这些魔术常量来解析原始代码非常困难,但是更新后的repro情况会使问题立即显现出来.GPU架构要求所有指针都与字边界对齐.你的内核包含一个指针访问,它没有正确的字对齐.双精度是64位类型,并且您的寻址未与偶数64位边界对齐.这个:
*(double*)((int*)data+100) = 0.0; // 50th double
Run Code Online (Sandbox Code Playgroud)
或这个:
*(double*)((int*)data+102) = 0.0; // 51st double
Run Code Online (Sandbox Code Playgroud)
都是合法的.这个:
*(double*)((int*)data+101) = 0.0; // not aligned to a 64 bit boundary
Run Code Online (Sandbox Code Playgroud)
不是.
归档时间: |
|
查看次数: |
9809 次 |
最近记录: |