我为Conway的生活游戏编写了这个CUDA内核:
__global__ void gameOfLife(float* returnBuffer, int width, int height) {
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float p = tex2D(inputTex, x, y);
float neighbors = 0;
neighbors += tex2D(inputTex, x+1, y);
neighbors += tex2D(inputTex, x-1, y);
neighbors += tex2D(inputTex, x, y+1);
neighbors += tex2D(inputTex, x, y-1);
neighbors += tex2D(inputTex, x+1, y+1);
neighbors += tex2D(inputTex, x-1, y-1);
neighbors += tex2D(inputTex, x-1, y+1);
neighbors += tex2D(inputTex, x+1, y-1);
__syncthreads();
float final = 0;
if(neighbors …
Run Code Online (Sandbox Code Playgroud)