使用Thrust计算带有步幅的迭代器

5 cuda thrust

我正在寻找一种方法来使用该thrust::counting_iterator函数来并行化以下for循环:

for (int stride = 0 ; stride < N * M ; stride+=M) // N iterations
{
    // Body of the loop
}
Run Code Online (Sandbox Code Playgroud)

以下是代码的外观:

struct functor ()
{
   __host__ __device__ void operator() (const int i)
   {
      // Body of the loop
   }
}

thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N * M;
thrust::for_each (it1 , it2 , functor());
Run Code Online (Sandbox Code Playgroud)

我知道将counting_iterator迭代器增加1,那么有没有办法增加M?

Rob*_*lla 4

为什么不直接将i变量乘以M函子中的变量呢?

如果M在编译时已知,它可能是:

struct functor 
{
   __host__ __device__ void operator() (const int my_i)
   {
      int i = my_i *M;
      // Body of the loop
   }
};

thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (it1 , it2 , functor());
Run Code Online (Sandbox Code Playgroud)

如果M仅在运行时知道,我们可以将其作为初始化参数传递给仿函数:

struct functor 
{
   int my_M;
   functor(int _M) : my_M(_M) ();
   __host__ __device__ void operator() (const int my_i)
   {
      int i = my_i *my_M;
      // Body of the loop
   }
};

thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (it1 , it2 , functor(M));
Run Code Online (Sandbox Code Playgroud)

您还可以将计数迭代器包装在变换迭代器中,该迭代器采用计数迭代器并将其乘以 M:

struct functor 
{
   __host__ __device__ void operator() (const int i)
   {
      // Body of the loop
   }
};

using namespace thrust::placeholders;
thrust::counting_iterator<int> it1(0);
thrust::counting_iterator<int> it2 = it1 + N;
thrust::for_each (make_transform_iterator(it1, _1 * M) , thrust::make_transform_iterator(it2, _1 * M) , functor());
Run Code Online (Sandbox Code Playgroud)

最后一个示例使用推力占位符表达式,尽管它可以通过一个额外的简单函子等效地实现,该函子返回其参数乘以其参数。

这是一个完整的示例,显示了所有 3 种方法:

$ cat t492.cu
#include <stdio.h>
#include <thrust/transform.h>
#include <thrust/for_each.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/host_vector.h>
#include <thrust/functional.h>
#define N 5
#define M 4
using namespace thrust::placeholders;

struct my_functor_1
{
  __host__ __device__  void operator() (const int i)
  {
    printf("functor 1 value: %d\n", i);
  }
};

struct my_functor_2
{
   __host__ __device__ void operator() (const int my_i)
   {
    int i = my_i*M;
    printf("functor 2 value: %d\n", i);
   }
};

struct my_functor_3
{
   int my_M;
   my_functor_3(int _M) : my_M(_M) {};
   __host__ __device__ void operator() (const int my_i)
   {
      int i = my_i *my_M;
      printf("functor 3 value: %d\n", i);
   }
};


int main(){
  thrust::counting_iterator<int> it1(0);
  thrust::counting_iterator<int> it2 = it1 + N;
  thrust::for_each(thrust::host, it1, it2, my_functor_1());
  thrust::for_each(thrust::host, it1, it2, my_functor_2());
  thrust::for_each(thrust::host, it1, it2, my_functor_3(M));
  thrust::for_each(thrust::host, thrust::make_transform_iterator(it1, _1 * M), thrust::make_transform_iterator(it2, _1 * M), my_functor_1());
  return 0;
}


$ nvcc -arch=sm_20 -o t492 t492.cu
$ ./t492
functor 1 value: 0
functor 1 value: 1
functor 1 value: 2
functor 1 value: 3
functor 1 value: 4
functor 2 value: 0
functor 2 value: 4
functor 2 value: 8
functor 2 value: 12
functor 2 value: 16
functor 3 value: 0
functor 3 value: 4
functor 3 value: 8
functor 3 value: 12
functor 3 value: 16
functor 1 value: 0
functor 1 value: 4
functor 1 value: 8
functor 1 value: 12
functor 1 value: 16
$
Run Code Online (Sandbox Code Playgroud)