我正在进行BGR到灰度转换的SIMD优化,这相当于OpenCV的cvtColor()功能.这个功能有英特尔SSE版本,我指的是它.(我正在做的基本上是将SSE代码转换为NEON代码.)
我差不多完成了代码的编写,可以用g ++编译它,但是我无法得到正确的输出.有没有人有任何想法错误是什么?
我得到的(不正确):

我应该得到什么:

这是我的代码:
#include <opencv/cv.hpp>
#include <opencv/highgui.h>
#include <arm_neon.h>
//#include <iostream>
using namespace std;
//using namespace cv;
#define int8x16_to_8x8x2(v) ((int8x8x2_t) { vget_low_s8(v), vget_high_s8(v) })
void cvtBGR2GrayNEON(cv::Mat& src, cv::Mat& dest)
{
const int size = src.size().area()*src.channels();
uchar* s = src.ptr<uchar>(0);
uchar* d = dest.ptr<uchar>(0);
const int8x16_t mask1 = {0,3,6,9,12,15,1,4,7,10,13,2,5,8,11,14};
const int8x16_t smask1 = {6,7,8,9,10,0,1,2,3,4,5,11,12,13,14,15};
const int8x16_t ssmask1 = {11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10};
const int8x16_t mask2 = {0,3,6,9,12,15, 2,5,8,11,14,1,4,7,10,13};
const int8x16_t ssmask2 = {0,1,2,3,4,11,12,13,14,15,5,6,7,8,9,10};
const int8x16_t bmask1 = …Run Code Online (Sandbox Code Playgroud)