我刚刚发现 libcu++ 库并尝试使用这些cuda::atomic变量。我编写了以下程序,但它给了我意想不到的结果:
#include <atomic>
#include <cuda/atomic>
#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void atomic_test()
{
cuda::atomic<int, cuda::thread_scope_block> x{0};
x.fetch_add(1, cuda::memory_order_seq_cst);
__syncthreads();
int y = x.load(cuda::memory_order_acquire);
printf("(%d %d) - Value of x is %d\n", blockIdx.x, threadIdx.x, y);
}
int main()
{
atomic_test<<<2, 32>>>();
gpuErrchk( cudaDeviceSynchronize() …Run Code Online (Sandbox Code Playgroud)