C#中的通用与非通用性能

Dmi*_*try 25 c# generics performance templates

我写了两个等价的方法:

static bool F<T>(T a, T b) where T : class
{
    return a == b;
}

static bool F2(A a, A b)
{
    return a == b;
}
Run Code Online (Sandbox Code Playgroud)

时差:
00:00:00.0380022
00:00:00.0170009

测试代码:

var a = new A();
for (int i = 0; i < 100000000; i++)
    F<A>(a, a);
Console.WriteLine(DateTime.Now - dt);

dt = DateTime.Now;
for (int i = 0; i < 100000000; i++)
    F2(a, a);
Console.WriteLine(DateTime.Now - dt);
Run Code Online (Sandbox Code Playgroud)

有谁知道为什么?

在下面的评论中,dtb*显示CIL:

IL for F2: ldarg.0, ldarg.1, ceq, ret. IL for F<T>: ldarg.0, box !!T, ldarg.1, box !!T, ceq, ret.
Run Code Online (Sandbox Code Playgroud)

我认为这是我的问题的答案,但我可以使用什么魔法来否定拳击?

接下来我使用Psilon的代码:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace ConsoleApplication58
{
    internal class Program
    {
        private class A
        {

        }

        private static bool F<T>(T a, T b) where T : class
        {
            return a == b;
        }

        private static bool F2(A a, A b)
        {
            return a == b;
        }

        private static void Main()
        {
            const int rounds = 100, n = 10000000;
            var a = new A();
            var fList = new List<TimeSpan>();
            var f2List = new List<TimeSpan>();
            for (int i = 0; i < rounds; i++)
            {
                // Test generic
                GCClear();
                bool res;
                var sw = new Stopwatch();
                sw.Start();
                for (int j = 0; j < n; j++)
                {
                    res = F(a, a);
                }
                sw.Stop();
                fList.Add(sw.Elapsed);

                // Test not-generic
                GCClear();
                bool res2;
                var sw2 = new Stopwatch();
                sw2.Start();
                for (int j = 0; j < n; j++)
                {
                    res2 = F2(a, a);
                }
                sw2.Stop();
                f2List.Add(sw2.Elapsed);
            }
            double f1AverageTicks = fList.Average(ts => ts.Ticks);
            Console.WriteLine("Elapsed for F = {0} \t ticks = {1}", fList.Average(ts => ts.TotalMilliseconds),
                              f1AverageTicks);
            double f2AverageTicks = f2List.Average(ts => ts.Ticks);
            Console.WriteLine("Elapsed for F2 = {0} \t ticks = {1}", f2List.Average(ts => ts.TotalMilliseconds),
                  f2AverageTicks);
            Console.WriteLine("Not-generic method is {0} times faster, or on {1}%", f1AverageTicks/f2AverageTicks,
                              (f1AverageTicks/f2AverageTicks - 1)*100);
            Console.ReadKey();
        }

        private static void GCClear()
        {
            GC.Collect();
            GC.WaitForPendingFinalizers();
            GC.Collect();
        }
    }
}
Run Code Online (Sandbox Code Playgroud)

Windows 7,.NET 4.5,Visual Studio 2012,发布,优化,无需附加.

64位

Elapsed for F = 23.68157         ticks = 236815.7
Elapsed for F2 = 1.701638        ticks = 17016.38
Not-generic method is 13.916925926666 times faster, or on 1291.6925926666%
Run Code Online (Sandbox Code Playgroud)

86

Elapsed for F = 6.713223         ticks = 67132.23
Elapsed for F2 = 6.729897        ticks = 67298.97
Not-generic method is 0.997522398931217 times faster, or on -0.247760106878314%
Run Code Online (Sandbox Code Playgroud)

而且我有了新的魔力:x64快了三倍......

PS:我的目标平台是x64.

Alo*_*aus 20

我确实对您的代码进行了一些更改,以正确测量性能.

  1. 使用秒表
  2. 执行发布模式
  3. 防止内联.
  4. 使用GetHashCode()做一些真正的工作
  5. 查看生成的汇编代码

这是代码:

class A
{
}

[MethodImpl(MethodImplOptions.NoInlining)]
static bool F<T>(T a, T b) where T : class
{
    return a.GetHashCode() == b.GetHashCode();
}

[MethodImpl(MethodImplOptions.NoInlining)]
static bool F2(A a, A b)
{
    return a.GetHashCode() == b.GetHashCode();
}

static int Main(string[] args)
{
    const int Runs = 100 * 1000 * 1000;
    var a = new A();
    bool lret = F<A>(a, a);
    var sw = Stopwatch.StartNew();
    for (int i = 0; i < Runs; i++)
    {
        F<A>(a, a);
    }
    sw.Stop();
    Console.WriteLine("Generic: {0:F2}s", sw.Elapsed.TotalSeconds);

    lret = F2(a, a);
    sw = Stopwatch.StartNew();
    for (int i = 0; i < Runs; i++)
    {
        F2(a, a);
    }
    sw.Stop();
    Console.WriteLine("Non Generic: {0:F2}s", sw.Elapsed.TotalSeconds);

    return lret ? 1 : 0;
}
Run Code Online (Sandbox Code Playgroud)

During my tests the non generic version was slightly faster (.NET 4.5 x32 Windows 7). But there is practically no measurable difference in speed. I would say the are both equal. For completeness here is the assembly code of the generic version: I got the assembly code via the debugger in Release mode with JIT optimizations enabled.The default is to disable JIT optimizations during debugging to make setting breakpoints and variables inspection easier.

Generic

static bool F<T>(T a, T b) where T : class
{
        return a.GetHashCode() == b.GetHashCode();
}

push        ebp 
mov         ebp,esp 
push        ebx 
sub         esp,8 // reserve stack for two locals 
mov         dword ptr [ebp-8],ecx // store first arg on stack
mov         dword ptr [ebp-0Ch],edx // store second arg on stack
mov         ecx,dword ptr [ebp-8] // get first arg from stack --> stupid!
mov         eax,dword ptr [ecx]   // load MT pointer from a instance
mov         eax,dword ptr [eax+28h] // Locate method table start
call        dword ptr [eax+8] //GetHashCode // call GetHashCode function pointer which is the second method starting from the method table
mov         ebx,eax           // store result in ebx
mov         ecx,dword ptr [ebp-0Ch] // get second arg
mov         eax,dword ptr [ecx]     // call method as usual ...
mov         eax,dword ptr [eax+28h] 
call        dword ptr [eax+8] //GetHashCode
cmp         ebx,eax 
sete        al 
movzx       eax,al 
lea         esp,[ebp-4] 
pop         ebx 
pop         ebp 
ret         4 
Run Code Online (Sandbox Code Playgroud)

Non Generic

static bool F2(A a, A b)
{
  return a.GetHashCode() == b.GetHashCode();
}

push        ebp 
mov         ebp,esp 
push        esi 
push        ebx 
mov         esi,edx 
mov         eax,dword ptr [ecx] 
mov         eax,dword ptr [eax+28h] 
call        dword ptr [eax+8] //GetHashCode
mov         ebx,eax 
mov         ecx,esi 
mov         eax,dword ptr [ecx] 
mov         eax,dword ptr [eax+28h] 
call        dword ptr [eax+8] //GetHashCode
cmp         ebx,eax 
sete        al 
movzx       eax,al 
pop         ebx 
pop         esi 
pop         ebp 
ret 
Run Code Online (Sandbox Code Playgroud)

As you can see the generic version looks slightly more inefficient due to more stack memoy operations which are not perfect but in reality the difference is not measurable since all is fitting into the L1 cache of the processor which makes the memory operations less costly compared to the pure register operations of the non generic version. I would suspect that the non generic version should perform a little better in real world if you need to pay for real memory access not coming from any CPU cache.

For all practical purposes these both methods are identical. You should look at some other place for real world performance gains. I would first look at the data access patterns and used data structures. Algorithmic changes tend to bring much more perf gain than such low level stuff.

Edit1: If you want to use == then you will find

00000000  push        ebp 
00000001  mov         ebp,esp 
00000003  cmp         ecx,edx // Check for reference equality 
00000005  sete        al 
00000008  movzx       eax,al 
0000000b  pop         ebp 
0000000c  ret         4 
Run Code Online (Sandbox Code Playgroud)

both methods produce exactly the same machine code. Any difference you did measure are your measurement errors.

  • 谁在乎IL?机器代码是一样的. (8认同)

Sco*_*ain 6

您的测试方法存在缺陷.如何做到这一点有几个大问题.

首先,你没有提供" 热身 ".在.NET中,第一次访问它时,它将比后续调用慢,因此它可以加载任何所需的程序集.如果您要执行这样的测试,您必须至少执行一次每个功能,否则第一次运行测试将受到很大的惩罚.继续交换订单,您可能会看到相反的结果.

第二个DateTime仅精确到16ms,因此当比较两次时,你的+/-误差为32 ms.两个结果之间的差异是21毫秒,完全在实验误差范围内.您必须使用更准确的计时器,如秒表类.

最后,不要做这样的人工测试.除了吹嘘一个或另一个类别的权利之外,它们不会向您显示任何有用的信息.而是学会使用Code Profiler.这将向您展示什么在减慢您的代码速度,您可以就如何解决问题做出明智的决定,而不是"猜测"不使用模板化的类将使您的代码更快.

这是一个示例代码,显示它应该如何"完成":

using System;
using System.Diagnostics;

namespace Sandbox_Console
{
    class A
    {
    }

    internal static class Program
    {
        static bool F<T>(T a, T b) where T : class
        {
            return a == b;
        }

        static bool F2(A a, A b)
        {
            return a == b;
        }

        private static void Main()
        {
            var a = new A();
            Stopwatch st = new Stopwatch();

            Console.WriteLine("warmup");
            st.Start();
            for (int i = 0; i < 100000000; i++)
                F<A>(a, a);
            Console.WriteLine(st.Elapsed);

            st.Restart();
            for (int i = 0; i < 100000000; i++)
                F2(a, a);
            Console.WriteLine(st.Elapsed);

            Console.WriteLine("real");
            st.Restart();
            for (int i = 0; i < 100000000; i++)
                F<A>(a, a);
            Console.WriteLine(st.Elapsed);

            st.Restart();
            for (int i = 0; i < 100000000; i++)
                F2(a, a);
            Console.WriteLine(st.Elapsed);

            Console.WriteLine("Done");
            Console.ReadLine();
        }

    }
}
Run Code Online (Sandbox Code Playgroud)

以下是结果:

warmup
00:00:00.0297904
00:00:00.0298949
real
00:00:00.0296838
00:00:00.0297823
Done
Run Code Online (Sandbox Code Playgroud)

将最后两个的顺序交换为第一个的顺序总是更短,因此有效地它们是与实验误差内的"相同时间".


Ben*_*igt 5

不要担心时间安排,担心正确性.

那些方法并不等同.其中一个使用class A's operator==和另一个使用object's operator==.

  • 因此,除非类A重载operator ==,否则两者都使用对象的运算符==,因此*是*等价的. (6认同)

Dan*_*zey 3

两件事情:

  1. 您正在与 进行基准测试DateTime.NowStopwatch代替使用。
  2. 您正在运行非正常情况下的代码。JIT 很可能会影响第一次运行,使您的第一个方法变慢。

如果你改变测试的顺序(即首先测试非泛型方法),你的结果会相反吗?我怀疑是这样。当我将代码插入LINQPad中,然后复制它以便运行这两个测试两次时,第二次迭代的执行时间彼此相差不到几百个时钟周期。

所以,回答你的问题:是的,有人知道为什么。这是因为你的基准不准确!

  • @nirmus:这里没有拳击。对于引用类型,“box”指令根本不执行任何操作。它将被 JIT 删除。 (7认同)