Bob*_*ong 1 c textures cuda coordinates
我正在使用cuda查看DCT的实现:http://www.cse.nd.edu/courses/cse60881/www/source_code/dct8x8/dct8x8_kernel1.cu 有问题的部分在这里:
__shared__ float CurBlockLocal1[BLOCK_SIZE2];
__global__ void CUDAkernel1DCT(float *Dst, int ImgWidth, int OffsetXBlocks, int OffsetYBlocks)
{
// Block index
const int bx = blockIdx.x + OffsetXBlocks;
const int by = blockIdx.y + OffsetYBlocks;
// Thread index (current coefficient)
const int tx = threadIdx.x;
const int ty = threadIdx.y;
// Texture coordinates
const float tex_x = (float)( (bx << BLOCK_SIZE_LOG2) + tx ) + 0.5f;
const float tex_y = (float)( (by << BLOCK_SIZE_LOG2) + ty ) + 0.5f;
//copy current image pixel to the first block
CurBlockLocal1[ (ty << BLOCK_SIZE_LOG2) + tx ] = tex2D(TexSrc, tex_x, tex_y);
//synchronize threads to make sure the block is copied
__syncthreads();
Run Code Online (Sandbox Code Playgroud)
其中块大小为 8,因此 block_size_log2 为 3。
为什么要这样定义纹理坐标?为什么我们需要使用纹理坐标?Cuda中的“<<”是什么?
以相反的顺序回答您的问题:
a << b等于a * 2^b其中a和b都是正整数。因此,您所询问的代码基本上是二乘法的整数幂的简写。您询问的代码可能会写成
__shared__ float CurBlockLocal1[BLOCK_SIZE2];
__global__ void CUDAkernel1DCT(float *Dst, int ImgWidth, int OffsetXBlocks, int OffsetYBlocks)
{
// Block index
const int bx = blockIdx.x + OffsetXBlocks;
const int by = blockIdx.y + OffsetYBlocks;
// Thread index (current coefficient)
const int tx = threadIdx.x;
const int ty = threadIdx.y;
// Texture coordinates
const float tex_x = (float)( (bx * BLOCK_SIZE) + tx ) + 0.5f;
const float tex_y = (float)( (by * BLOCK_SIZE) + ty ) + 0.5f;
//copy current image pixel to the first block
CurBlockLocal1[ (ty * BLOCK_SIZE) + tx ] = tex2D(TexSrc, tex_x, tex_y);
......
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1206 次 |
| 最近记录: |