Phi*_*ght 4 .net c# multithreading ilspy
我正在学习编写并发数据结构并将ConcurrentStack实现视为学习练习.作为起点,我使用IlSpy将其反编译为C#,创建了ConcurrentStack实现的副本.我仅限于调查和使用Push和TryPop方法.
但是我的实现速度明显慢于使用原始版本.
我的测试使用4个线程(在单个套接字上,4个核心CPU),每个线程对不同的核心具有线程亲和性.每个线程执行1,000,000个循环,每个循环执行三次推送和三次弹出.运行测试多次完成所有线程的平均时间是......
因此,即使代码,据我所知,两者之间的代码相同,但克隆的速度要慢50%左右.我在一次运行中运行了500次测试并取了所有运行的平均值.所以我不相信这个问题是代码的初始JIT.
任何想法为什么这些方法的副本会慢得多?
C#实现
(为了完整起见,我提供了可用于复制结果的C#控制台应用程序代码.对于任何有趣的人来说,看看他们是否得到与我相同的结果.)
class Program
{
static void Main(string[] args)
{
int processors = Environment.ProcessorCount;
Console.WriteLine("Processors: {0}", processors);
List<Type> runnersT = new List<Type>() { typeof(ThreadRunnerConcurrent),
typeof(ThreadRunnerCASStack)};
int cycles = 500;
foreach (Type runnerT in runnersT)
{
long total = 0;
for (int i = 0; i < cycles; i++)
{
// Create a thread runner per processor
List<ThreadRunner> runners = new List<ThreadRunner>();
for (int j = 0; j < processors; j++)
{
ThreadRunner runner = Activator.CreateInstance(runnerT) as ThreadRunner;
runner.Processor = j;
runners.Add(runner);
}
// Start each runner going
Stopwatch sw = new Stopwatch();
sw.Start();
runners.ForEach((r) => r.Start());
// Wait for all the runners to exit
runners.ForEach((r) => r.Join());
runners.ForEach((r) => r.Check());
sw.Stop();
total += sw.ElapsedMilliseconds;
}
Console.WriteLine("{0} Average: {1}ms", runnerT.Name, (total / cycles));
}
Console.WriteLine("Finished");
Console.ReadLine();
}
}
abstract class ThreadRunner
{
private int _processor;
private Thread _thread;
public ThreadRunner()
{
}
public int Processor
{
get { return _processor; }
set { _processor = value; }
}
public void Start()
{
_thread = new Thread(new ParameterizedThreadStart(Run));
_thread.Start();
}
public void Join()
{
_thread.Join();
}
public abstract void Check();
protected abstract void Run(int cycles);
private void Run(object param)
{
SetAffinity();
Run(1000000);
}
private void SetAffinity()
{
#pragma warning disable 618
int osThreadId = AppDomain.GetCurrentThreadId();
#pragma warning restore 618
// Set the thread's processor affinity
ProcessThread thread = Process.GetCurrentProcess().Threads.Cast<ProcessThread>().Where(t => t.Id == osThreadId).Single();
thread.ProcessorAffinity = new IntPtr(1L << Processor);
}
}
class ThreadRunnerConcurrent : ThreadRunner
{
private static ConcurrentStack<int> _stack = new ConcurrentStack<int>();
protected override void Run(int cycles)
{
int ret;
for (int i = 0; i < cycles; i++)
{
_stack.Push(i);
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
while (!_stack.TryPop(out ret)) ;
}
}
public override void Check()
{
if (_stack.Count > 0)
Console.WriteLine("ThreadRunnerConcurrent has entries!");
}
}
class ThreadRunnerCASStack : ThreadRunner
{
private static CASStack<int> _stack = new CASStack<int>();
protected override void Run(int cycles)
{
int ret;
for (int i = 0; i < cycles; i++)
{
_stack.Push(i);
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
_stack.Push(i);
while (!_stack.TryPop(out ret)) ;
while (!_stack.TryPop(out ret)) ;
}
}
public override void Check()
{
if (_stack.Count > 0)
Console.WriteLine("ThreadRunnerCASStack has entries!");
}
}
class CASStack<T>
{
private class Node
{
internal readonly T m_value;
internal CASStack<T>.Node m_next;
internal Node(T value)
{
this.m_value = value;
this.m_next = null;
}
}
private volatile CASStack<T>.Node m_head;
public void Push(T item)
{
CASStack<T>.Node node = new CASStack<T>.Node(item);
node.m_next = this.m_head;
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, node, node.m_next) == node.m_next)
return;
PushCore(node, node);
}
private void PushCore(Node head, Node tail)
{
SpinWait spinWait = default(SpinWait);
do
{
spinWait.SpinOnce();
tail.m_next = this.m_head;
}
while (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, head, tail.m_next) != tail.m_next);
}
public bool TryPop(out T result)
{
CASStack<T>.Node head = this.m_head;
if (head == null)
{
result = default(T);
return false;
}
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, head.m_next, head) == head)
{
result = head.m_value;
return true;
}
return TryPopCore(out result);
}
private bool TryPopCore(out T result)
{
CASStack<T>.Node node;
if (TryPopCore(1, out node) == 1)
{
result = node.m_value;
return true;
}
result = default(T);
return false;
}
private int TryPopCore(int count, out CASStack<T>.Node poppedHead)
{
SpinWait spinWait = default(SpinWait);
int num = 1;
Random random = new Random(Environment.TickCount & 2147483647);
CASStack<T>.Node head;
int num2;
while (true)
{
head = this.m_head;
if (head == null)
break;
CASStack<T>.Node node = head;
num2 = 1;
while (num2 < count && node.m_next != null)
{
node = node.m_next;
num2++;
}
if (Interlocked.CompareExchange<CASStack<T>.Node>(ref this.m_head, node.m_next, head) == head)
goto Block_5;
for (int i = 0; i < num; i++)
spinWait.SpinOnce();
num = (spinWait.NextSpinWillYield ? random.Next(1, 8) : (num * 2));
}
poppedHead = null;
return 0;
Block_5:
poppedHead = head;
return num2;
}
}
#endregion
Run Code Online (Sandbox Code Playgroud)
ConcurrentStack<T>CASStack<T>虽然两者的代码完全相同,但有一个优势就是你没有.
ConcurrentStack<T>您的计算机上安装了一个ngen'd本机映像,该映像是在安装.Net框架安装时编译的.您CASStack<T>正在通过JIT进行编译,并且由于JIT必须快速,因此它不会像ngen中的AOT编译器那样执行任何优化.
我在计算机上测试了你的代码.没有你的形象,我得到了这些结果:
Processors: 4
ThreadRunnerConcurrent Average: 764ms
ThreadRunnerCASStack Average: 948ms
Finished
Run Code Online (Sandbox Code Playgroud)
在ngening之后:
Processors: 4
ThreadRunnerConcurrent Average: 778ms
ThreadRunnerCASStack Average: 742ms
Finished
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
194 次 |
| 最近记录: |