<<< >>> vscode中的cuda

Bug*_*ler 3 c++ cuda visual-studio-code vscode-settings

vscode-cpptools是否有任何方法可以抑制“ <<< >>>”错误。

我在中将“ * .cu”与“ cpp”相关联setting.json

// use normal c++ syntax highlighting for CUDA files
  "files.associations": {"*.cu": "cpp"},
Run Code Online (Sandbox Code Playgroud)

并且工作正常,除了一个问题, kernel execution configuration parameters surrounded by <<< and >>>被误认为是错误expected an expression

dim3 dimGrid(2, 2, 1);
dim3 dimBlock(width / 2, width / 2, 1);
MatrixMulKernel<<<dimGrid, dimBlock>>>(d_M, d_N, d_P, width);
Run Code Online (Sandbox Code Playgroud)

任何建议

Bug*_*ler 5

搜寻了几个小时,找不到完美的解决方案,但找到了一些解决方法。

我在这里总结:

  • 通过编辑对CUDA文件使用普通的c ++语法突出显示setting.json
  • 在程序中包含必要的CUDA标头
  • 包括虚拟头以解决方法INTELLISENSE

波纹管是一个具体的例子

  • setting.json
"files.associations": {
    "*.cu": "cpp",
    "*.cuh": "cpp"
  }
Run Code Online (Sandbox Code Playgroud)
  • cudaDmy.cuh
#pragma once
#ifdef __INTELLISENSE__
void __syncthreads();  // workaround __syncthreads warning
#define KERNEL_ARG2(grid, block)
#define KERNEL_ARG3(grid, block, sh_mem)
#define KERNEL_ARG4(grid, block, sh_mem, stream)
#else
#define KERNEL_ARG2(grid, block) <<< grid, block >>>
#define KERNEL_ARG3(grid, block, sh_mem) <<< grid, block, sh_mem >>>
#define KERNEL_ARG4(grid, block, sh_mem, stream) <<< grid, block, sh_mem,    
stream >>>
#endif
Run Code Online (Sandbox Code Playgroud)
  • 矩阵文库
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <device_functions.h>
#include <cuda_runtime_api.h>
#include "cudaDmy.cuh"

__global__ void MatrixMulKernel(float *M, float *N, float *P, int width)
{
    int Row = blockIdx.y * blockDim.y + threadIdx.y;
    int Col = blockIdx.x * blockDim.x + threadIdx.x;
    if (Row < width && Col < width)
    {
        float Pvalue = 0;
        for (int i = 0; i < width; ++i)
        {
            Pvalue += M[Row * width + i] * N[width * i + Col];
        }
        P[Row * width + Col] = Pvalue;
    }
}

void MatMul(float *M, float *N, float *P, int width)
{
    float *d_M;
    float *d_N;
    float *d_P;
    int size = width * width * sizeof(float);
    cudaMalloc((void **)&d_M, size);
    cudaMemcpy(d_M, M, size, cudaMemcpyHostToDevice);

    cudaMalloc((void **)&d_N, size);
    cudaMemcpy(d_N, N, size, cudaMemcpyHostToDevice);

    cudaMalloc((void **)&d_P, size);

    dim3 dimGrid(2, 2, 1);
    dim3 dimBlock(width / 2, width / 2, 1);
    // <<<>>> will replace macro KERNEL_ARG2 when compiling 
    MatrixMulKernel KERNEL_ARG2(dimGrid,dimBlock) (d_M, d_M, d_P, width);
    cudaMemcpy(P, d_P, size, cudaMemcpyDeviceToHost);
    cudaFree(d_M);
    cudaFree(d_N);
    cudaFree(d_P);
}

int main()
{
    int elem = 100;
    float *M = new float[elem];
    float *N = new float[elem];
    float *P = new float[elem];

    for (int i = 0; i < elem; ++i)
        M[i] = i;

    for (int i = 0; i < elem; ++i)
        N[i] = i + elem;

    time_t t1 = time(NULL);
    MatMul(M, N, P, sqrt(elem));
    time_t t2 = time(NULL);
    double seconds = difftime(t2,t1);
    printf ("%.3f seconds total time\n", seconds);
    for (int i = 0; i < elem/1000000; ++i)
        printf("%.1f\t", P[i]);
    printf("\n");
    delete[] M;
    delete[] N;
    delete[] P;
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

让我们用NVCC编译它

nvcc matrixMul.cu -Xcudafe "--diag_suppress=unrecognized_pragma" -o runcuda
Run Code Online (Sandbox Code Playgroud)

有用的链接: