我正在尝试使用原子指令在CUDA中实现一个关键部分,但我遇到了一些麻烦.我创建了测试程序来显示问题:
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include <stdio.h>
__global__ void k_testLocking(unsigned int* locks, int n) {
int id = threadIdx.x % n;
while (atomicExch(&(locks[id]), 1u) != 0u) {} //lock
//critical section would go here
atomicExch(&(locks[id]),0u); //unlock
}
int main(int argc, char** argv) {
//initialize the locks array on the GPU to (0...0)
unsigned int* locks;
unsigned int zeros[10]; for (int i = 0; i < 10; i++) {zeros[i] = 0u;}
cutilSafeCall(cudaMalloc((void**)&locks, sizeof(unsigned int)*10));
cutilSafeCall(cudaMemcpy(locks, zeros, sizeof(unsigned int)*10, cudaMemcpyHostToDevice));
//Run …Run Code Online (Sandbox Code Playgroud)