如何在android上更快地将RGB565转换为YUV420SP?

xuf*_*fan 4 rgb android jpeg arm yuv

我需要显示一个jpeg图片,并将其转换为YUV420SP.首先我使用SkBitmap来解析jpeg并显示它,然后我使用下面的代码将RGB565转换为YUV420SP,但它花费75ms来转换640*480 RGB565图片,所以任何人都知道更快的方式将RGB565转换为YUV420SP的android?或者更快的方式将jpeg文件转换为YUV420SP在android上?

// Convert from RGB to YUV420
int RGB2YUV_YR[256], RGB2YUV_YG[256], RGB2YUV_YB[256];
int RGB2YUV_UR[256], RGB2YUV_UG[256], RGB2YUV_UBVR[256];
int RGB2YUV_VG[256], RGB2YUV_VB[256];

//
// Table used for RGB to YUV420 conversion
//
void InitLookupTable()
{
    static bool hasInited = false;
    if(hasInited)
        return ;
    hasInited = true;
    int i;
    for (i = 0; i < 256; i++)
        RGB2YUV_YR[i] = (float) 65.481 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_YG[i] = (float) 128.553 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_YB[i] = (float) 24.966 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UR[i] = (float) 37.797 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UG[i] = (float) 74.203 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_VG[i] = (float) 93.786 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_VB[i] = (float) 18.214 * (i << 8);
    for (i = 0; i < 256; i++)
        RGB2YUV_UBVR[i] = (float) 112 * (i << 8);
}

int ConvertRGB5652YUV420SP(int w, int h, unsigned char *bmp, unsigned char *yuv)
{
    unsigned char *u, *v, *y, *uu, *vv;
    unsigned char *pu1, *pu2, *pu3, *pu4;
    unsigned char *pv1, *pv2, *pv3, *pv4;
    unsigned char rValue = 0, gValue = 0, bValue = 0;
    uint16_t* bmpPtr;
    int i, j;
    printf("ConvertRGB5652YUV420SP begin,w=%d,h=%d,bmp=%p,yuv=%p\n", w, h, bmp, yuv);

    struct timeval tpstart,tpend;
    gettimeofday(&tpstart,NULL);

    InitLookupTable();

    gettimeofday(&tpend,NULL);
    float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("InitLookupTable used time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    uu = new unsigned char[w * h];
    vv = new unsigned char[w * h];
    if (uu == NULL || vv == NULL || yuv == NULL)
        return 0;
    y = yuv;
    u = uu;
    v = vv;
    // Get r,g,b pointers from bmp image data....
    bmpPtr = (uint16_t*)bmp;

    //Get YUV values for rgb values...
    for (i = 0; i < h; i++) {
        for (j = 0; j < w; j++) {
            uint16_t color = *bmpPtr;
            unsigned int r = (color>>11) & 0x1f;
            unsigned int g = (color>> 5) & 0x3f;
            unsigned int b = (color    ) & 0x1f;
            rValue = (r<<3) | (r>>2);      
            gValue = (g<<2) | (g>>4);   
            bValue = (b<<3) | (b>>2);

            *y++ = (RGB2YUV_YR[rValue] + RGB2YUV_YG[gValue] + RGB2YUV_YB[bValue] +
                1048576) >> 16;
            *u++ = (-RGB2YUV_UR[rValue] - RGB2YUV_UG[gValue] + RGB2YUV_UBVR[bValue] +
                8388608) >> 16;
            *v++ = (RGB2YUV_UBVR[rValue] - RGB2YUV_VG[gValue] - RGB2YUV_VB[bValue] +
                8388608) >> 16;
            bmpPtr++;
        }
    }

    gettimeofday(&tpend,NULL);
    timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("Get YUV values used  time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    // Now sample the U & V to obtain YUV 4:2:0 format
    // Get the right pointers...
    u = yuv + w * h;
    v = u + 1;
    // For U
    pu1 = uu;
    pu2 = pu1 + 1;
    pu3 = pu1 + w;
    pu4 = pu3 + 1;
    // For V
    pv1 = vv;
    pv2 = pv1 + 1;
    pv3 = pv1 + w;
    pv4 = pv3 + 1;
    // Do sampling....
    for (i = 0; i < h; i += 2) {
        for (j = 0; j < w; j += 2) {
            *u = (*pu1 + *pu2 + *pu3 + *pu4) >> 2;
            u += 2;
            *v = (*pv1 + *pv2 + *pv3 + *pv4) >> 2;
            v += 2;
            pu1 += 2;
            pu2 += 2;
            pu3 += 2;
            pu4 += 2;
            pv1 += 2;
            pv2 += 2;
            pv3 += 2;
            pv4 += 2;
        }

        pu1 += w;
        pu2 += w;
        pu3 += w;
        pu4 += w;
        pv1 += w;
        pv2 += w;
        pv3 += w;
        pv4 += w;
    }

    gettimeofday(&tpend,NULL);
    timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("Do sampling used time=%f\n", timeuse);
    gettimeofday(&tpstart,NULL);

    delete uu;
    delete vv;
    return 1;
}

int main(int argc, char **argv) {
    unsigned char bmp[640*480*2] = {0};
    unsigned char yuv[(640*480*3)/2] = {0};
    struct timeval tpstart,tpend;
    gettimeofday(&tpstart,NULL);

    ConvertRGB5652YUV420SP(640, 480, bmp, yuv);

    gettimeofday(&tpend,NULL);
    float timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec;
    timeuse/=1000;
    printf("ConvertARGB2YUV420SP used time=%f\n", timeuse);
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

android上的输出(armv6):

ConvertRGB5652YUV420SP begin,w=640,h=480,bmp=0xbe7314fc,yuv=0xbe7c74fc
InitLookupTable used time=0.383000
Get YUV values used  time=61.394001
Do sampling used time=11.918000
ConvertARGB2YUV420SP used time=74.596001
Run Code Online (Sandbox Code Playgroud)

cpu信息:

$ cat /proc/cpuinfo
cat /proc/cpuinfo
Processor       : ARMv6-compatible processor rev 5 (v6l)
BogoMIPS        : 791.34
Features        : swp half thumb fastmult vfp edsp java
CPU implementer : 0x41
CPU architecture: 6TEJ
CPU variant     : 0x1
CPU part        : 0xb36
CPU revision    : 5

Hardware        : IMAPX200
Revision        : 0000
Serial          : 0000000000000000
Run Code Online (Sandbox Code Playgroud)

Jak*_*LEE 7

在ARMv7上,使用NEON.它将在不到1毫秒的时间内完成工作.(VGA)

如果您遇到ARMv6,请在ARM组件中对其进行优化.(VGA上大约8ms)

使用定点算术而不是查找表.摆脱它们.

做两个面具:

  • 0x001f001f:mask1
  • 0x003f003f:mask2

然后将两个像素一次加载到32位寄存器(这比16位读取要快得多)

and red, mask1, pixel, lsr #11
and grn, mask2, pixel, lsr #5
and blu, mask1, pixel
Run Code Online (Sandbox Code Playgroud)

现在你有三个寄存器,每个寄存器包含两个值 - 一个在低位,另一个在高16位.

smulxy指令将从这里做一些奇迹.(16位乘法)

祝好运.

PS:您的查找表也不是那么好.为什么它们的长度都是256?您可以将它们减少到32(r和b相关)和64(g相关),这将增加缓存命中率.可能只针对目标40ms而不采取装配.是的,缓存未命中是痛苦的.