Ale*_*aev 3 c# mono simd vectorization monodevelop
基准代码:
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Mono.Simd;
using MathNet.Numerics.LinearAlgebra.Single;
namespace XXX {
public static class TimeSpanExtensions {
public static double TotalNanoseconds(this TimeSpan timeSpan) {
return timeSpan.TotalMilliseconds * 1000000.0;
}
}
public sealed class SimdBenchmark : Benchmark {
Vector4f a = new Vector4f(1.0f, 2.0f, 3.0f, 4.0f);
Vector4f b = new Vector4f(1.0f, 2.0f, 3.0f, 4.0f);
Vector4f c;
public override void Do() {
c = a + b;
}
}
public sealed class MathNetBenchmark : Benchmark {
DenseVector a = new DenseVector(new float[]{1.0f,2.0f,3.0f,4.0f});
DenseVector b = new DenseVector(new float[]{1.0f,2.0f,3.0f,4.0f});
DenseVector c;
public override void Do() {
c = a + b;
}
}
public sealed class DefaultBenchmark : Benchmark {
Vector4 a = new Vector4(1.0f, 2.0f, 3.0f, 4.0f);
Vector4 b = new Vector4(1.0f, 2.0f, 3.0f, 4.0f);
Vector4 c;
public override void Do() {
c = a + b;
}
}
public sealed class SimpleBenchmark : Benchmark {
float a = 1.0f;
float b = 2.0f;
float c;
public override void Do() {
c = a + b;
}
}
public sealed class DelegateBenchmark : Benchmark {
private readonly Action _action;
public DelegateBenchmark(Action action) {
_action = action;
}
public override void Do() {
_action();
}
}
public abstract class Benchmark : IEnumerable<TimeSpan> {
public IEnumerator<TimeSpan> GetEnumerator() {
Do(); // Warm-up!
GC.Collect(); // Collect garbage.
GC.WaitForPendingFinalizers(); // Wait until finalizers finish.
var stopwatch = new Stopwatch();
while (true) {
stopwatch.Reset();
stopwatch.Start();
Do();
stopwatch.Stop();
yield return stopwatch.Elapsed;
}
}
IEnumerator IEnumerable.GetEnumerator() {
return GetEnumerator();
}
public abstract void Do();
}
public struct Vector4 {
float x;
float y;
float z;
float w;
public Vector4(float x, float y, float z, float w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public static Vector4 operator +(Vector4 v1, Vector4 v2) {
return new Vector4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
}
}
class MainClass {
public static void Main(string[] args) {
var avgNS1 = new SimdBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS2 = new SimpleBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS3 = new DefaultBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS4 = new MathNetBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
Console.WriteLine(avgNS1 + " ns");
Console.WriteLine(avgNS2 + " ns");
Console.WriteLine(avgNS3 + " ns");
Console.WriteLine(avgNS4 + " ns");
}
}
}
Run Code Online (Sandbox Code Playgroud)
环境设置:
Windows 7/Mono 2.10.8/MonoDevelop 2.8.5
MonoDevelop设置:
结果:
这些是我的结果:
1608.8 ns
1554.9 ns
1582.5 ns
Run Code Online (Sandbox Code Playgroud)
(没有MathNET,虽然这里不重要).Os是Ubuntu 10.10(32位),Mono 2.10.7.此时您可能会考虑制作针对Windows Mono版本的错误报告.但:
我认为由于基准测试的机制开销,这不是对SIMD操作进行基准测试的正确方法.
例如,根据您的Vector4类查看此原始测试.
const int count = 100000;
var simdVector = new Vector4f(1, 2, 3, 4);
var simdResult = simdVector;
var sw = Stopwatch.StartNew();
for(var i = 0; i < count; i++)
{
simdResult += simdVector;
}
sw.Stop();
Console.WriteLine("SIMD result: {0} {1}", sw.Elapsed, simdResult);
sw = Stopwatch.StartNew();
var usualVector = new Vector4(1, 2, 3, 4);
var usualResult = usualVector;
for(var i = 0; i < count; i++)
{
usualResult += usualVector;
}
sw.Stop();
Console.WriteLine("Usual result: {0} {1}", sw.Elapsed, usualResult);
Run Code Online (Sandbox Code Playgroud)
在我的机器上的结果是:
SIMD result: 00:00:00.0005802 <100001, 200002, 300003, 400004>
Usual result: 00:00:00.0029598 <100001, 200002, 300003, 400004>
Run Code Online (Sandbox Code Playgroud)
所以肯定不同于你的测试.因此,您可能认为SIMD操作的速度更快 - 但基准测试并不容易.在这种配置中上环更快的原因有很多.这些原因可以在另一个场合讨论.
不过可以肯定的是SIMD 是比情侣更快速的连续增加.您应该检查的是它们是否真的被排放.
在Linux上,可以使用mono来检查生成的程序集(在目标处理器的程序集的意义上,而不是单声道程序集;)-v -v.然而,我不确定它是否适用于通常的Windows系统,因为它可能使用来自GCC的disas(你可能有更多的运气使用cygwin).通过阅读这样的程序集,您可以检查SIMD操作是否真正发出.
例如,通过检查为上面粘贴的程序生成的程序集,可以发现它addps在SIMD循环中使用指令,这是我们在这里寻找的.
哦,为了完整性,这里输出SIMD禁用:
$ mono --optimize=-simd SimdTest.exe
SIMD result: 00:00:00.0027111 <100001, 200002, 300003, 400004>
Usual result: 00:00:00.0026127 <100001, 200002, 300003, 400004>
Run Code Online (Sandbox Code Playgroud)
这与生成的程序集不那么重要,不包含SIMD操作.
希望这有用.
| 归档时间: |
|
| 查看次数: |
2151 次 |
| 最近记录: |