Ily*_*iev 2 c++ arrays algorithm cuda graph
语境
我正在尝试使用 CUDA 实现 Boruvka MST 算法,但根本不需要了解该算法来帮助我。
问题
那么,让我描述一下问题:我有一个图,以边列表格式存储(边数组,每条边用 2 个相邻的顶点 ID 及其权重表示)。
但是,(这非常重要!)为了优化对设备内存的访问,我不是将边存储为单个结构数组,而是存储为三个单独的数组:
要访问单边,只需使用该数组的相同索引进行迭代即可:
现在,当我描述了数据格式后,问题就来了
我想从这三个数组中删除元素(边缘),条件如下:
1)如果 src_id[i] == dst_id[i], 则移除第 i 条边
2)如果 src_id[i] != dst_id[i],但存在另一条边 j 具有相同的 src_id[j] 和 dst_id[j],但权重[j]较小,则删除第 i 条边
换句话说,我想:
第一个很简单:我可以使用 Throw::remove_if 或按照此处所述进行扫描,从数组中并行删除元素,以删除具有相同 id 的边缘。(我已经通过扫描实现了第二种变体)。
但我未能实现第二部分,即删除重复的边缘。我有一个想法,但不确定这种方法是否有效。让我描述一下。
首先,我们将按以下方式重新排序(或排序)这三个数组:
当所有边都以这种方式排序时,删除重复的非最小边相对容易:
问题*
但问题是我不知道如何有效地对三个数组进行排序。(也许我可以对转换后的数据、单个结构数组使用推力::排序,但它似乎会非常慢,最好根本不删除重复的边缘)
或者也许有人可以建议更好的方法来删除重复的边缘,而无需以这种方式对它们进行排序。
感谢您阅读本文,如有任何建议,我们将不胜感激!
thrust::sort
您可以使用 .在一次调用中轻松对多个向量进行排序thrust::zip_iterator
。
主要思想是:
auto z = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin(), d_weights.begin()));
thrust::sort(z,z+N);
Run Code Online (Sandbox Code Playgroud)
这将首先按第一个向量对三个向量进行排序,然后按第二个向量,然后按第三个向量排序。
以下代码展示了如何在完整的示例中使用它。它使用自定义函子(从 复制thrust::detail
)在单个调用中执行该remove_if
步骤,而无需存储中间结果。
#include <thrust/sort.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/remove.h>
#define PRINTER(name) print(#name, (name))
template <template <typename...> class V, typename T, typename ...Args>
void print(const char* name, const V<T,Args...> & v)
{
std::cout << name << ":\t";
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t"));
std::cout << std::endl;
}
// copied from https://github.com/thrust/thrust/blob/master/thrust/detail/range/head_flags.h
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/tuple.h>
#include <thrust/functional.h>
template<typename RandomAccessIterator,
typename BinaryPredicate = thrust::equal_to<typename thrust::iterator_value<RandomAccessIterator>::type>,
typename ValueType = bool,
typename IndexType = typename thrust::iterator_difference<RandomAccessIterator>::type>
class head_flags
{
// XXX WAR cudafe issue
//private:
public:
struct head_flag_functor
{
BinaryPredicate binary_pred; // this must be the first member for performance reasons
IndexType n;
typedef ValueType result_type;
__host__ __device__
head_flag_functor(IndexType n)
: binary_pred(), n(n)
{}
__host__ __device__
head_flag_functor(IndexType n, BinaryPredicate binary_pred)
: binary_pred(binary_pred), n(n)
{}
template<typename Tuple>
__host__ __device__ __thrust_forceinline__
result_type operator()(const Tuple &t)
{
const IndexType i = thrust::get<0>(t);
// note that we do not dereference the tuple's 2nd element when i <= 0
// and therefore do not dereference a bad location at the boundary
return (i == 0 || !binary_pred(thrust::get<1>(t), thrust::get<2>(t)));
}
};
typedef thrust::counting_iterator<IndexType> counting_iterator;
public:
typedef thrust::transform_iterator<
head_flag_functor,
thrust::zip_iterator<thrust::tuple<counting_iterator,RandomAccessIterator,RandomAccessIterator> >
> iterator;
__host__ __device__
head_flags(RandomAccessIterator first, RandomAccessIterator last)
: m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first - 1)),
head_flag_functor(last - first))),
m_end(m_begin + (last - first))
{}
__host__ __device__
head_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred)
: m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<IndexType>(0), first, first - 1)),
head_flag_functor(last - first, binary_pred))),
m_end(m_begin + (last - first))
{}
__host__ __device__
iterator begin() const
{
return m_begin;
}
__host__ __device__
iterator end() const
{
return m_end;
}
template<typename OtherIndex>
__host__ __device__
typename iterator::reference operator[](OtherIndex i)
{
return *(begin() + i);
}
private:
iterator m_begin, m_end;
};
template<typename RandomAccessIterator>
__host__ __device__
head_flags<RandomAccessIterator>
make_head_flags(RandomAccessIterator first, RandomAccessIterator last)
{
return head_flags<RandomAccessIterator>(first, last);
}
int main()
{
const int N = 6;
int src_ids[] = {3,1,2,2,3,3};
int dst_ids[] = {2,2,3,3,1,1};
float weights[] = {1,2,8,4,5,6};
thrust::device_vector<int> d_src_ids(src_ids,src_ids+N);
thrust::device_vector<int> d_dst_ids(dst_ids,dst_ids+N);
thrust::device_vector<float> d_weights(weights,weights+N);
std::cout << "--- initial values ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
auto z = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin(), d_weights.begin()));
thrust::sort(z,z+N);
std::cout << "--- after sort ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
auto z2 = thrust::make_zip_iterator(thrust::make_tuple(d_src_ids.begin(),d_dst_ids.begin()));
auto t = make_head_flags(z2,z2+N);
using namespace thrust::placeholders;
auto end = thrust::remove_if(z,z+N, t.begin(), !_1);
int new_size = thrust::get<0>(end.get_iterator_tuple()) - d_src_ids.begin();
d_src_ids.resize(new_size);
d_dst_ids.resize(new_size);
d_weights.resize(new_size);
std::cout << "--- after remove_if ---" << std::endl;
PRINTER(d_src_ids);
PRINTER(d_dst_ids);
PRINTER(d_weights);
return 0;
}
Run Code Online (Sandbox Code Playgroud)
输出:
--- initial values ---
d_src_ids: 3 1 2 2 3 3
d_dst_ids: 2 2 3 3 1 1
d_weights: 1 2 8 4 5 6
--- after sort ---
d_src_ids: 1 2 2 3 3 3
d_dst_ids: 2 3 3 1 1 2
d_weights: 2 4 8 5 6 1
--- after remove_if ---
d_src_ids: 1 2 3 3
d_dst_ids: 2 3 1 2
d_weights: 2 4 5 1
Run Code Online (Sandbox Code Playgroud)