我正在尝试使用CUDA Thrust来解决问题.
我有一个包含3元素的主机数组.是否有可能使用Thrust创建一个384元素的设备数组,其中3主机数组中的元素重复128次数(128 x 3 = 384)?
一般来说,从一个3元素数组开始,我如何使用Thrust生成一个大小的设备数组X,其中X = Y x 3,即Y重复次数是多少?
一种可能的方法:
此代码是对示例的跨步范围示例的一个微不足道的修改.您可以将REPSdefine 更改为128以查看完整扩展到384个输出元素:
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
// for printing
#include <thrust/copy.h>
#include <ostream>
#define STRIDE 3
#define REPS 15 // change to 128 if you like
#define DSIZE (STRIDE*REPS)
// this example illustrates how to make strided access to a range of values
// examples:
// strided_range([0, 1, 2, 3, 4, 5, 6], 1) -> [0, 1, 2, 3, 4, 5, 6]
// strided_range([0, 1, 2, 3, 4, 5, 6], 2) -> [0, 2, 4, 6]
// strided_range([0, 1, 2, 3, 4, 5, 6], 3) -> [0, 3, 6]
// ...
template <typename Iterator>
class strided_range
{
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;
stride_functor(difference_type stride)
: stride(stride) {}
__host__ __device__
difference_type operator()(const difference_type& i) const
{
return stride * i;
}
};
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}
iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
}
iterator end(void) const
{
return begin() + ((last - first) + (stride - 1)) / stride;
}
protected:
Iterator first;
Iterator last;
difference_type stride;
};
int main(void)
{
thrust::host_vector<int> h_data(STRIDE);
h_data[0] = 1;
h_data[1] = 2;
h_data[2] = 3;
thrust::device_vector<int> data(DSIZE);
typedef thrust::device_vector<int>::iterator Iterator;
strided_range<Iterator> pos1(data.begin(), data.end(), STRIDE);
strided_range<Iterator> pos2(data.begin()+1, data.end(), STRIDE);
strided_range<Iterator> pos3(data.begin()+2, data.end(), STRIDE);
thrust::fill(pos1.begin(), pos1.end(), h_data[0]);
thrust::fill(pos2.begin(), pos2.end(), h_data[1]);
thrust::fill(pos3.begin(), pos3.end(), h_data[2]);
// print the generated data
std::cout << "data: ";
thrust::copy(data.begin(), data.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << std::endl;
return 0;
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1613 次 |
| 最近记录: |