键为字符串或字符数组时如何使用Thrust实现按键reduce

fan*_*nhk 0 cuda thrust

输入:

BC
BD
BC
BC
BD
CD

输出:

BC 3
BD 2
CD 1

如果我使用 char 类型作为键,它是可用的。但似乎 Thrust 不支持字符串作为键。

#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/reduce.h>
#include <string>

int main(void)
{
  std::string data = "aaabbbbbcddeeeeeeeeeff";

  size_t N = data.size();

  thrust::device_vector<char> input(data.begin(), data.end());

  thrust::device_vector<char> output(N);
  thrust::device_vector<int>  lengths(N);

  size_t num_runs =
    thrust::reduce_by_key(input.begin(), input.end(),        
                      thrust::constant_iterator<int>(1), 
                      output.begin(),                    
                      lengths.begin()                    
                      ).first - output.begin();
   return 0;
}
Run Code Online (Sandbox Code Playgroud)

如何使用 Thrust 实现它?

Rob*_*lla 5

向@AngryLettuce 道歉,这里有两种可能的方法:

方法一:

  1. 创建一个结构来保存您的密钥。该结构将为char您的密钥中的每个字符包含一项。
  2. sort将相似的钥匙放在一起的钥匙。看起来您想要的实际上只是每个键类型的计数,无论它出现在序列中的哪个位置。为了方便使用reduce_by_key,有必要首先将 like 键组合在一起。否则,reduce_by_key将由不同的中间键分隔的类似键视为不同的键序列。从您想要的输入和输出中可以明显看出这不是您想要的。
  3. 现在reduce_by_key在排序的键上使用,像键一样计数。

步骤 2 需要(对于此方法)一个函子来对键进行排序,而步骤 3 需要一个函子来识别“相等”键的含义,这reduce_by_key需要。

方法二:

  1. 创建两个单独的char device_vector(s),一个保存每个键的第一个字母,另一个保存每个键的第二个字母。然后,我们将zip_iterator在代码的其余部分使用将这两个向量视为统一的“关键”向量。

  2. sort压缩的密钥向量。在这种情况下,推力知道如何对基本类型的压缩向量进行排序,并且不需要单独的排序函子

  3. 执行reduce_by_key的压缩(和排序)密钥矢量。这再次不需要单独的相等函子。Thrust 知道如何确定基本类型的压缩向量的相等性。

除了不需要任何函子定义之外,第二种方法可能也会更快,因为zip_iterator与第一种方法中存在的 AoS(结构数组)相比,它往往会改善数据访问。

这是一个演示这两种方法的工作示例:

$ cat t1004.cu
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <iostream>

#include <thrust/iterator/zip_iterator.h>

struct key {
  char k1;
  char k2;
};

struct sort_functor{
  __host__ __device__ bool operator()(key &k1, key &k2){
    if (k1.k1 < k2.k1) return true;
    if (k1.k1 > k2.k1) return false;
    if (k1.k2 < k2.k2) return true;
    return false;}
};

struct equal_key{
  __host__ __device__ bool operator()(key k1, key k2){
    if ((k1.k1 == k2.k1)&&(k1.k2 == k2.k2)) return true;
    return false;}
};

int main(){

  key data[] = {{'B','C'},{'B','D'},{'B','C'},{'B','C'},{'B','D'},{'C','D'}};;
  size_t dsize = sizeof(data)/sizeof(key);


//method 1
  thrust::device_vector<key> keys(data, data+dsize);
  thrust::device_vector<key> keys_out(dsize);
  thrust::device_vector<int> lengths(dsize);
  thrust::sort(keys.begin(), keys.end(), sort_functor());
  int rsize = thrust::reduce_by_key(keys.begin(), keys.end(), thrust::constant_iterator<int>(1), keys_out.begin(), lengths.begin(),equal_key()).first - keys_out.begin();
  std::cout << "Method1:" << std::endl;
  for (int i = 0; i < rsize; i++){
    key temp = keys_out[i];
    int len = lengths[i];
    std::cout << " " << temp.k1 << temp.k2 << " " <<  len << std::endl;}

//method 2

  //get the key data into 2 separate vectors.
  //there are more efficient ways to do this
  //but this is not the crux of your question

  thrust::device_vector<char> k1;
  thrust::device_vector<char> k2;
  for (int i = 0; i < dsize; i++){
    k1.push_back(data[i].k1);
    k2.push_back(data[i].k2);}

  thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())));

  thrust::device_vector<char> k1r(dsize);
  thrust::device_vector<char> k2r(dsize);
  rsize = thrust::reduce_by_key(thrust::make_zip_iterator(thrust::make_tuple(k1.begin(), k2.begin())), thrust::make_zip_iterator(thrust::make_tuple(k1.end(), k2.end())), thrust::constant_iterator<int>(1), thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(), k2r.begin())), lengths.begin()).first - thrust::make_zip_iterator(thrust::make_tuple(k1r.begin(),k2r.begin()));
  std::cout << "Method2:" << std::endl;
  for (int i = 0; i < rsize; i++){
    char c1 = k1r[i];
    char c2 = k2r[i];
    int len = lengths[i];
    std::cout << " " << c1 << c2 << " " <<  len << std::endl;}

  return 0;
}
$ nvcc -o t1004 t1004.cu
$ ./t1004
Method1:
 BC 3
 BD 2
 CD 1
Method2:
 BC 3
 BD 2
 CD 1
$
Run Code Online (Sandbox Code Playgroud)

这是方法 2 的改进版本。您应该能够直接使用 string/char 数组,并且这个版本也可以很容易地修改以适应 2 到 10 个字符的密钥长度。此方法使用跨距范围迭代器直接从数据数组中提取单个关键字符:

$ cat t1004.cu
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <iostream>

#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>

template <typename Iterator>
class strided_range
{
    public:

    typedef typename thrust::iterator_difference<Iterator>::type difference_type;

    struct stride_functor : public thrust::unary_function<difference_type,difference_type>
    {
        difference_type stride;

        stride_functor(difference_type stride)
            : stride(stride) {}

        __host__ __device__
        difference_type operator()(const difference_type& i) const
        {
            return stride * i;
        }
    };

    typedef typename thrust::counting_iterator<difference_type>                   CountingIterator;
    typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
    typedef typename thrust::permutation_iterator<Iterator,TransformIterator>     PermutationIterator;

    // type of the strided_range iterator
    typedef PermutationIterator iterator;

    // construct strided_range for the range [first,last)
    strided_range(Iterator first, Iterator last, difference_type stride)
        : first(first), last(last), stride(stride) {}

    iterator begin(void) const
    {
        return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
    }

    iterator end(void) const
    {
        return begin() + ((last - first) + (stride - 1)) / stride;
    }

    protected:
    Iterator first;
    Iterator last;
    difference_type stride;
};

typedef thrust::device_vector<char>::iterator cIterator;

int main(){

//method 2

  //get the key data into separate vectors, one per character in key.
#define KEYLEN 2
  const char data[] = "BCBDBCBCBDCD";
  size_t dsize = sizeof(data)/sizeof(char);
  size_t numkeys = dsize/KEYLEN;
  thrust::device_vector<char> keys(data, data+dsize);
  strided_range<cIterator>  *str_k[KEYLEN];
  for (int i = 0; i < KEYLEN; i++)
    str_k[i] = new strided_range<cIterator>(keys.begin()+i, keys.end(), KEYLEN);

//modify this line also if KEYLEN changes (max 10)
  auto my_z = thrust::make_zip_iterator(thrust::make_tuple((*str_k[0]).begin(), (*str_k[1]).begin()));

  thrust::sort(my_z, my_z+numkeys);

  thrust::device_vector<char> kr[KEYLEN];
  for (int i = 0; i < KEYLEN; i++)
    kr[i].resize(numkeys);

//modify this line also if KEYLEN changes (max 10)
  auto my_zr = thrust::make_zip_iterator(thrust::make_tuple(kr[0].begin(), kr[1].begin()));

  thrust::device_vector<int> lengths(numkeys);

  size_t rsize = thrust::reduce_by_key(my_z, my_z + numkeys, thrust::constant_iterator<int>(1), my_zr, lengths.begin()).first - my_zr;
  std::cout << "Method2:" << std::endl;

  for (int i = 0; i < rsize; i++){
    std::cout << " ";
    for (int j = 0; j < KEYLEN; j++){
      char c = kr[j][i];
      std::cout << c; }
    int len = lengths[i];
    std::cout <<" " <<  len << std::endl;}

  return 0;
}
$ nvcc -std=c++11 t1004.cu -o t1004
$ ./t1004
Method2:
 BC 3
 BD 2
 CD 1
$
Run Code Online (Sandbox Code Playgroud)