SSE2 - 16字节对齐的动态内存分配

Jac*_*cob 0 c++ assembly memory-alignment sse2 visual-c++-2005

编辑:

这是SSE2编译器错误的后续内容

这是我之前遇到的真正的错误,并通过更改Michael Burr建议的_mm_malloc声明在下面转载:

SO.exe中0x00415116处的未处理异常:0xC0000005:访问冲突读取位置0xffffffff.

在线 label: movdqa xmm0, xmmword ptr [t1+eax]

我正在尝试动态分配t1,t2并且根据本教程,我使用过_mm_malloc:

#include <emmintrin.h>
int main(int argc, char* argv[])
{ 
 int *t1, *t2;
 const int n = 100000;
 t1 = (int*)_mm_malloc(n*sizeof(int),16);
 t2 = (int*)_mm_malloc(n*sizeof(int),16);
 __m128i mul1, mul2;
  for (int j = 0; j < n; j++)
  {
  t1[j] = j;
  t2[j] = (j+1);
  } // set temporary variables to random values
  _asm
  {
   mov eax, 0
   label: movdqa xmm0, xmmword ptr [t1+eax]
   movdqa xmm1, xmmword ptr [t2+eax]
   pmuludq xmm0, xmm1
   movdqa mul1, xmm0
   movdqa xmm0, xmmword ptr [t1+eax]
   pshufd xmm0, xmm0, 05fh
   pshufd xmm1, xmm1, 05fh
   pmuludq xmm0, xmm1
   movdqa mul2, xmm0
   add eax, 16
   cmp eax, 100000
   jnge label
  }
     _mm_free(t1);
     _mm_free(t2);

 return 0;
}
Run Code Online (Sandbox Code Playgroud)

Mic*_*urr 5

你没有分配足够的内存:

t1 = (int*)_mm_malloc(n * sizeof( int),16);
t2 = (int*)_mm_malloc(n * sizeof( int),16);
Run Code Online (Sandbox Code Playgroud)


Mic*_*urr 5

我认为第二个问题是你正在读取指针变量的偏移量(不是指针指向的偏移量).

更改:

label: movdqa xmm0, xmmword ptr [t1+eax]
Run Code Online (Sandbox Code Playgroud)

对于这样的事情:

mov ebx, [t1]
label: movdqa xmm0, xmmword ptr [ebx+eax]
Run Code Online (Sandbox Code Playgroud)

类似地,通过t2指针进行访问.

这可能会更好(虽然我没有机会测试它,所以它甚至可能不起作用):

  _asm
  {
   mov eax, [t1]
   mov ebx, [t1]
   lea ecx, [eax + (100000*4)]

   label: movdqa xmm0, xmmword ptr [eax]
   movdqa xmm1, xmmword ptr [ebx]
   pmuludq xmm0, xmm1
   movdqa mul1, xmm0
   movdqa xmm0, xmmword ptr [eax]
   pshufd xmm0, xmm0, 05fh
   pshufd xmm1, xmm1, 05fh
   pmuludq xmm0, xmm1
   movdqa mul2, xmm0
   add eax, 16
   add ebx, 16
   cmp eax, ecx
   jnge label
  }
Run Code Online (Sandbox Code Playgroud)