我目前正在开发一个必须实现 2D-FFT(用于互相关)的程序。我使用 CUDA 进行了 1D FFT,这给了我正确的结果,我现在正在尝试实现 2D 版本。由于网上的示例和文档很少,我发现很难找出错误是什么。
到目前为止,我只使用 cuFFT 手册。
无论如何,我创建了两个 5x5 数组并用 1 填充它们。我将它们复制到 GPU 内存上并进行前向 FFT,将它们相乘,然后对结果进行 ifft。这给了我一个值为 650 的 5x5 数组。我希望在 5x5 数组中仅一个插槽中获得值为 25 的 DC 信号。相反,我在整个数组中得到 650。
此外,在将信号复制到 GPU 内存后,我不允许打印出信号的值。写作
cout << d_signal[1].x << endl;
Run Code Online (Sandbox Code Playgroud)
给我一个访问冲突。我在其他 cuda 程序中做了同样的事情,这不是一个问题。它与复杂变量的工作方式有关,还是人为错误?
如果有人指出出了什么问题,我将不胜感激。这是代码
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h>
#include <ctime>
#include <time.h>
#include <stdio.h>
#include <iostream>
#include <math.h>
#include <cufft.h>
#include <fstream>
using namespace std;
typedef float2 Complex;
__global__ void ComplexMUL(Complex *a, Complex *b)
{
int i = threadIdx.x;
a[i].x = a[i].x * b[i].x - a[i].y*b[i].y;
a[i].y = a[i].x * b[i].y + a[i].y*b[i].x;
}
int main()
{
int N = 5;
int SIZE = N*N;
Complex *fg = new Complex[SIZE];
for (int i = 0; i < SIZE; i++){
fg[i].x = 1;
fg[i].y = 0;
}
Complex *fig = new Complex[SIZE];
for (int i = 0; i < SIZE; i++){
fig[i].x = 1; //
fig[i].y = 0;
}
for (int i = 0; i < 24; i=i+5)
{
cout << fg[i].x << " " << fg[i + 1].x << " " << fg[i + 2].x << " " << fg[i + 3].x << " " << fg[i + 4].x << endl;
}
cout << "----------------" << endl;
for (int i = 0; i < 24; i = i + 5)
{
cout << fig[i].x << " " << fig[i + 1].x << " " << fig[i + 2].x << " " << fig[i + 3].x << " " << fig[i + 4].x << endl;
}
cout << "----------------" << endl;
int mem_size = sizeof(Complex)* SIZE;
cufftComplex *d_signal;
checkCudaErrors(cudaMalloc((void **) &d_signal, mem_size));
checkCudaErrors(cudaMemcpy(d_signal, fg, mem_size, cudaMemcpyHostToDevice));
cufftComplex *d_filter_kernel;
checkCudaErrors(cudaMalloc((void **)&d_filter_kernel, mem_size));
checkCudaErrors(cudaMemcpy(d_filter_kernel, fig, mem_size, cudaMemcpyHostToDevice));
// cout << d_signal[1].x << endl;
// CUFFT plan
cufftHandle plan;
cufftPlan2d(&plan, N, N, CUFFT_C2C);
// Transform signal and filter
printf("Transforming signal cufftExecR2C\n");
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD);
cufftExecC2C(plan, (cufftComplex *)d_filter_kernel, (cufftComplex *)d_filter_kernel, CUFFT_FORWARD);
printf("Launching Complex multiplication<<< >>>\n");
ComplexMUL <<< 32, 256 >> >(d_signal, d_filter_kernel);
// Transform signal back
printf("Transforming signal back cufftExecC2C\n");
cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE);
Complex *result = new Complex[SIZE];
cudaMemcpy(result, d_signal, sizeof(Complex)*SIZE, cudaMemcpyDeviceToHost);
for (int i = 0; i < SIZE; i=i+5)
{
cout << result[i].x << " " << result[i + 1].x << " " << result[i + 2].x << " " << result[i + 3].x << " " << result[i + 4].x << endl;
}
delete result, fg, fig;
cufftDestroy(plan);
//cufftDestroy(plan2);
cudaFree(d_signal);
cudaFree(d_filter_kernel);
}
Run Code Online (Sandbox Code Playgroud)
上面的代码给出了以下终端输出:
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
----------------
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
1 1 1 1 1
----------------
Transforming signal cufftExecR2C
Launching Complex multiplication<<< >>>
Transforming signal back cufftExecC2C
625 625 625 625 625
625 625 625 625 625
625 625 625 625 625
625 625 625 625 625
625 625 625 625 625
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
10599 次 |
| 最近记录: |