foreach vs for:请解释汇编代码差异

Asi*_*sik 8 c# performance x86 assembly

我最近一直在测试for循环与C#中的foreach循环的性能,我注意到,为了将一个int数组合成一个long,foreach循环可能实际上更快.这是完整的测试程序,我使用了Visual Studio 2012,x86,发布模式,优化.

这是两个循环的汇编代码.foreach:

            long sum = 0;
00000000  push        ebp 
00000001  mov         ebp,esp 
00000003  push        edi 
00000004  push        esi 
00000005  push        ebx 
00000006  xor         ebx,ebx 
00000008  xor         edi,edi 
            foreach (var i in collection) {
0000000a  xor         esi,esi 
0000000c  cmp         dword ptr [ecx+4],0 
00000010  jle         00000025 
00000012  mov         eax,dword ptr [ecx+esi*4+8] 
                sum += i;
00000016  mov         edx,eax 
00000018  sar         edx,1Fh 
0000001b  add         ebx,eax 
0000001d  adc         edi,edx 
0000001f  inc         esi 
            foreach (var i in collection) {
00000020  cmp         dword ptr [ecx+4],esi 
00000023  jg          00000012 
            }
            return sum;
00000025  mov         eax,ebx 
00000027  mov         edx,edi 
00000029  pop         ebx 
0000002a  pop         esi 
0000002b  pop         edi 
0000002c  pop         ebp 
0000002d  ret 
Run Code Online (Sandbox Code Playgroud)

并为:

    long sum = 0;
00000000  push        ebp 
00000001  mov         ebp,esp 
00000003  push        edi 
00000004  push        esi 
00000005  push        ebx 
00000006  push        eax 
00000007  xor         ebx,ebx 
00000009  xor         edi,edi 
            for (int i = 0; i < collection.Length; ++i) {
0000000b  xor         esi,esi 
0000000d  mov         eax,dword ptr [ecx+4] 
00000010  mov         dword ptr [ebp-10h],eax 
00000013  test        eax,eax 
00000015  jle         0000002A 
                sum += collection[i];
00000017  mov         eax,dword ptr [ecx+esi*4+8] 
0000001b  cdq 
0000001c  add         eax,ebx 
0000001e  adc         edx,edi 
00000020  mov         ebx,eax 
00000022  mov         edi,edx 
            for (int i = 0; i < collection.Length; ++i) {
00000024  inc         esi 
00000025  cmp         dword ptr [ebp-10h],esi 
00000028  jg          00000017 
            }
            return sum;
0000002a  mov         eax,ebx 
0000002c  mov         edx,edi 
0000002e  pop         ecx 
0000002f  pop         ebx 
00000030  pop         esi 
00000031  pop         edi 
00000032  pop         ebp 
00000033  ret
Run Code Online (Sandbox Code Playgroud)

如您所见,主循环是7个"foreach"指令和9个"for"指令.这转化为我的基准测试中大约10%的性能差异.

我不是很擅长阅读汇编代码但是我不明白为什么for循环不会像foreach那样有效.这里发生了什么?

Bla*_*ear 8

由于数组太大,唯一的相关部分显然是循环中的一个,这个:

// for loop
00000017  mov         eax,dword ptr [ecx+esi*4+8] 
0000001b  cdq 
0000001c  add         eax,ebx 
0000001e  adc         edx,edi 
00000020  mov         ebx,eax 
00000022  mov         edi,edx 

// foreach loop
00000012  mov         eax,dword ptr [ecx+esi*4+8] 
00000016  mov         edx,eax 
00000018  sar         edx,1Fh 
0000001b  add         ebx,eax 
0000001d  adc         edi,edx 
Run Code Online (Sandbox Code Playgroud)

由于和是一个long int,它存储在两个不同的寄存器中,即ebx包含其最不重要的四个字节,edi包含最重要的四个字节.它们在集合[i](隐式)从int转换为long方面有所不同:

// for loop
0000001b  cdq 

// foreach loop
00000016  mov         edx,eax 
00000018  sar         edx,1Fh 
Run Code Online (Sandbox Code Playgroud)

需要注意的另一个重要事项是for循环版本以"反向"顺序执行求和:

long temp = (long) collection[i];   // implicit cast, stored in edx:eax
temp += sum;                        // instead of "simply" sum += temp
sum = temp;                         // sum is stored back into ebx:edi
Run Code Online (Sandbox Code Playgroud)

我不能告诉你为什么编译器首选这种方式而不是sum + = temp(@EricLippert可能告诉我们:))但我怀疑它与可能出现的一些指令依赖性问题有关.


Pet*_*ter 5

好的,所以这是一个带注释的汇编代码版本,因为你会看到循环中的指令非常接近.

            foreach (var i in collection) {
0000000a  xor         esi,esi                       clear index
0000000c  cmp         dword ptr [ecx+4],0           get size of collection
00000010  jle         00000025                      exit if empty
00000012  mov         eax,dword ptr [ecx+esi*4+8]   get item from collection
                sum += i;
00000016  mov         edx,eax                       move to edx:eax
00000018  sar         edx,1Fh                       shift 31 bits to keep sign only
0000001b  add         ebx,eax                       add to sum
0000001d  adc         edi,edx                       add with carry from previous add
0000001f  inc         esi                           increment index
            foreach (var i in collection) {
00000020  cmp         dword ptr [ecx+4],esi         compare size to index
00000023  jg          00000012                      loop if more
            }
            return sum;
00000025  mov         eax,ebx                       result was in ebx
=================================================
            for (int i = 0; i < collection.Length; ++i) {
0000000b  xor         esi,esi                       clear index
0000000d  mov         eax,dword ptr [ecx+4]         get limit on for
00000010  mov         dword ptr [ebp-10h],eax       save limit
00000013  test        eax,eax                       test if limit is empty
00000015  jle         0000002A                      exit loop if empty
                sum += collection[i];
00000017  mov         eax,dword ptr [ecx+esi*4+8]   get item form collection  
0000001b  cdq                                       convert eax to edx:eax
0000001c  add         eax,ebx                       add to sum
0000001e  adc         edx,edi                       add with carry from previous add
00000020  mov         ebx,eax                       put result in edi:ebx
00000022  mov         edi,edx 
            for (int i = 0; i < collection.Length; ++i) {
00000024  inc         esi                           increment index
00000025  cmp         dword ptr [ebp-10h],esi       compare to limit
00000028  jg          00000017                      loop if more
            }
            return sum;
0000002a  mov         eax,ebx                       result was in ebx
Run Code Online (Sandbox Code Playgroud)