循环特征矩阵的最有效方法

ran*_*ser 9 c++ performance iterator eigen

我正在创建一些函数来做一些事情,比如负数和正数的分离和,kahan,成对和其他东西,其中我从矩阵中获取元素的顺序无关紧要,例如:

template <typename T, int R, int C>
inline T sum(const Eigen::Matrix<T,R,C>& xs)
{
  T sumP(0);
  T sumN(0);
  for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
   for (size_t j = 0; j < nCols; ++j)
   {
        if (xs(i,j)>0)
          sumP += xs(i,j);
        else if (xs(i,j)<0) //ignore 0 elements: improvement for sparse matrices I think
          sumN += xs(i,j);
   }
 return sumP+sumN;
}
Run Code Online (Sandbox Code Playgroud)

现在,我想尽可能提高效率,所以我的问题是,如上所述循环遍历每一行的每一列会更好,或者像下面这样做相反:

for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
  for (size_t j = 0; j < nRows; ++j)
Run Code Online (Sandbox Code Playgroud)

(我想这取决于矩阵元素在内存中的分配顺序,但我在Eigen的手册中找不到).

还有,还有其他替代方法,比如使用迭代器(它们存在于Eigen中吗?)可能会稍快一点?

ran*_*ser 17

我做了一些基准测试来检查哪种方式更快,我得到了以下结果(以秒为单位):

12
30
3
6
23
3
Run Code Online (Sandbox Code Playgroud)

第一行是按照@jleahy的建议进行迭代.第二行是迭代,就像我在问题中的代码中所做的那样(@jleahy的逆序).第三行是做迭代使用PlainObjectBase::data()这样for (int i = 0; i < matrixObject.size(); i++).其他3行重复与上面相同,但有一个临时的@ lucas92建议

我也做了相同的测试,但使用替换/ if else.*/for/else /(对稀疏矩阵没有特殊处理),我得到以下(在几秒钟内):

10
27
3
6
24
2
Run Code Online (Sandbox Code Playgroud)

再次进行测试给我的结果非常相似.我用g++ 4.7.3-O3.代码:

#include <ctime>
#include <iostream>
#include <Eigen/Dense>

using namespace std;

 template <typename T, int R, int C>
    inline T sum_kahan1(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
      for (size_t j = 0; j < nRows; ++j)
      {
          if (xs(j,i)>0)
          {
            yP = xs(j,i) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if (xs(j,i)<0)
          {
            yN = xs(j,i) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan2(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
      for (size_t j = 0; j < nCols; ++j)
      {
          if (xs(i,j)>0)
          {
            yP = xs(i,j) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if (xs(i,j)<0)
          {
            yN = xs(i,j) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }


 template <typename T, int R, int C>
    inline T sum_kahan3(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
    for (size_t i = 0, size = xs.size(); i < size; i++)
      {
          if ((*(xs.data() + i))>0)
          {
            yP = (*(xs.data() + i)) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if ((*(xs.data() + i))<0)
          {
            yN = (*(xs.data() + i)) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan1t(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
      for (size_t j = 0; j < nRows; ++j)
      {
      T temporary = xs(j,i);
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if (temporary<0)
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan2t(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
      for (size_t j = 0; j < nCols; ++j)
      {
      T temporary = xs(i,j);
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if (temporary<0)
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }


 template <typename T, int R, int C>
    inline T sum_kahan3t(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
    for (size_t i = 0, size = xs.size(); i < size; i++)
      {
      T temporary = (*(xs.data() + i));
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else if (temporary<0)
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan1e(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
      for (size_t j = 0; j < nRows; ++j)
      {
          if (xs(j,i)>0)
          {
            yP = xs(j,i) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = xs(j,i) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan2e(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
      for (size_t j = 0; j < nCols; ++j)
      {
          if (xs(i,j)>0)
          {
            yP = xs(i,j) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = xs(i,j) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }


 template <typename T, int R, int C>
    inline T sum_kahan3e(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
    for (size_t i = 0, size = xs.size(); i < size; i++)
      {
          if ((*(xs.data() + i))>0)
          {
            yP = (*(xs.data() + i)) - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = (*(xs.data() + i)) - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan1te(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nCols; ++i)
      for (size_t j = 0; j < nRows; ++j)
      {
      T temporary = xs(j,i);
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }

 template <typename T, int R, int C>
    inline T sum_kahan2te(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
      for (size_t i = 0, nRows = xs.rows(), nCols = xs.cols(); i < nRows; ++i)
      for (size_t j = 0; j < nCols; ++j)
      {
      T temporary = xs(i,j);
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }


 template <typename T, int R, int C>
    inline T sum_kahan3te(const Eigen::Matrix<T,R,C>& xs) {
      if (xs.size() == 0) return 0;
      T sumP(0);
      T sumN(0);
      T tP(0);
      T tN(0);
      T cP(0);
      T cN(0);
      T yP(0);
      T yN(0);
    for (size_t i = 0, size = xs.size(); i < size; i++)
      {
      T temporary = (*(xs.data() + i));
          if (temporary>0)
          {
            yP = temporary - cP;
          tP = sumP + yP;
          cP = (tP - sumP) - yP;
          sumP = tP;
          }
        else
          {
            yN = temporary - cN;
          tN = sumN + yN;
          cN = (tN - sumN) - yN;
          sumN = tN;
          }
      }
      return sumP+sumN;
    }


int main() {

    Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic> test = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>::Random(10000,10000);

    cout << "start" << endl;   
    int now;

    now = time(0);
    sum_kahan1(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan2(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan3(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan1t(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan2t(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan3t(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan1e(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan2e(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan3e(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan1te(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan2te(test);
    cout << time(0) - now << endl;

    now = time(0);
    sum_kahan3te(test);
    cout << time(0) - now << endl;

    return 0;
}
Run Code Online (Sandbox Code Playgroud)

  • 基准!美丽的。您会惊讶地发现有多少人拒绝对他们的代码进行基准测试。我很惊讶使用 .data 速度如此之快,这可能表明 eigen 正在 .cols、.rows 和 .xs 函数中做一些工作。您可以尝试将 xs.size() 和 xs.data() 提升到循环之外,如果是这种情况,这可能会更有帮助(尽管不太可能值得一试)。 (2认同)

jle*_*ahy 7

默认情况下,Eigen以列主(Fortran)顺序分配矩阵(文档).

迭代矩阵的最快方法是按存储顺序执行,以错误的方式执行此操作会增加缓存未命中数(如果矩阵不适合L1,则会占用您的计算时间,因此读取会增加计算时间)通过cacheline/elemsize的因子(可能是64/8 = 8).

如果您的矩阵适合L1缓存,这将没有什么区别,但一个好的编译器应该能够矢量化循环,启用AVX(在一个闪亮的新核心i7上)可以为您提供多达4倍的加速.(256位/ 64位).

最后不要指望任何Eigen的内置函数能给你加速(我不认为有任何迭代器,但我可能会弄错),它们只会给你相同的(非常简单) )代码.

TLDR:交换迭代顺序,您需要最快地改变行索引.

  • 您发送的链接中的页面使用了 `PlainObjectBase::data()` 像这样 `for (int i = 0; i &lt; Acolmajor.size(); i++)`,我没有发现这个函数存在,也许它是这样像简单的循环一样快,我不必担心它是否是列和行主序矩阵。我将做一些基准测试来检查这一点。感谢您的回答和链接! (2认同)