我使用System.Buffers.Binary.BinaryPrimitives以精确的方式将值写入字节数组。如果没有来自 MS 的示例示例,我可以看到几种方法来做到这一点,但我不确定其中一种是否比另一种更好。原则上,需要创建大量Span<byte>对象似乎不理想?
考虑这个简单的例子:
//writes these values in this order to a new 16-byte buffer
byte[] PopulateBuffer(int i1,int i2,Int16 s1,Int16 s2)
{
var buffer = new byte[16]; //padded based on external protocol
var span = new Span<byte>(buffer);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4),i1);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4,4),i2);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(8,2),s1);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(10,2),s2);
return buffer;
}
Run Code Online (Sandbox Code Playgroud)
我在Span这里实例化 5 个对象。与通过位移位手动获取字节的老式方法相比,这看起来很混乱,但实际上开销很大吗?有没有更好的方法来使用这个类?
TL;DR:从下面的结果来看,Span基于 -based 的方法似乎比替代方法快得多。
请注意,这Span<T>是一种值类型,JIT 在看穿它方面做得很好。
我创建了一个简化的测试:
using System;
using System.Buffers.Binary;
public class C
{
byte[] PopulateBufferSpan(int i1, short s2)
{
var buffer = new byte[6];
var span = new Span<byte>(buffer);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4), i1);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(4,2), s2);
return buffer;
}
byte[] PopulateBufferExplicit(int i1, short s2)
{
var buffer = new byte[6];
buffer[0] = (byte)(i1 & 0xFF);
buffer[1] = (byte)((i1 >> 8) & 0xFF);
buffer[2] = (byte)((i1 >> 16) & 0xFF);
buffer[3] = (byte)((i1 >> 24) & 0xFF);
buffer[4] = (byte)(s2 & 0xFF);
buffer[5] = (byte)((s2 >> 8) & 0xFF);
return buffer;
}
}
Run Code Online (Sandbox Code Playgroud)
哪个 JIT 是:
C.PopulateBufferSpan(Int32, Int16)
L0000: push rdi
L0001: push rsi
L0002: sub rsp, 0x28
L0006: mov esi, edx
L0008: mov edi, r8d
L000b: mov rcx, 0x7ffec35e2360
L0015: mov edx, 0x6
L001a: call 0x7fff230847e0
L001f: lea rdx, [rax+0x10]
L0023: mov ecx, 0x6
L0028: mov r8d, ecx
L002b: cmp r8, 0x4
L002f: jb L0051
L0031: mov r8, rdx
L0034: mov [r8], esi
L0037: mov ecx, ecx
L0039: cmp rcx, 0x6
L003d: jb L0057
L003f: add rdx, 0x4
L0043: movsx rcx, di
L0047: mov [rdx], cx
L004a: add rsp, 0x28
L004e: pop rsi
L004f: pop rdi
L0050: ret
L0051: call System.ThrowHelper.ThrowArgumentOutOfRangeException()
L0056: int3
L0057: call System.ThrowHelper.ThrowArgumentOutOfRangeException()
L005c: int3
C.PopulateBufferExplicit(Int32, Int16)
L0000: push rdi
L0001: push rsi
L0002: sub rsp, 0x28
L0006: mov esi, edx
L0008: mov edi, r8d
L000b: mov rcx, 0x7ffec35e2360
L0015: mov edx, 0x6
L001a: call 0x7fff230847e0
L001f: mov [rax+0x10], sil
L0023: mov edx, esi
L0025: sar edx, 0x8
L0028: mov [rax+0x11], dl
L002b: mov edx, esi
L002d: sar edx, 0x10
L0030: mov [rax+0x12], dl
L0033: sar esi, 0x18
L0036: mov [rax+0x13], sil
L003a: movsx rdx, di
L003e: mov [rax+0x14], dl
L0041: sar edx, 0x8
L0044: mov [rax+0x15], dl
L0047: add rsp, 0x28
L004b: pop rsi
L004c: pop rdi
L004d: ret
Run Code Online (Sandbox Code Playgroud)
如您所见,两者的复杂性几乎没有区别,只是使用的版本BinaryPrimitives有一些范围检查(这不是坏事)。
请注意,现在 JIT 是多层的,我认为SharpLab 只显示第一层的结果,所以如果它在热门路径上,这可能会得到改进。
我还使用 BenchmarkDotNet 运行了一个基准测试:
public class MyBenchmark
{
private byte[] buffer = new byte[32];
[Benchmark]
public void PopulateBufferLESpan()
{
PopulateBufferLESpanImpl(1, 2, 3, 4);
}
[Benchmark]
public void PopulateBufferLEExplicit()
{
PopulateBufferLEExplicitImpl(1, 2, 3, 4);
}
[Benchmark]
public void PopulateBufferBESpan()
{
PopulateBufferBESpanImpl(1, 2, 3, 4);
}
[Benchmark]
public void PopulateBufferBEExplicit()
{
PopulateBufferBEExplicitImpl(1, 2, 3, 4);
}
private void PopulateBufferLESpanImpl(int i1, int i2, short s1, short s2)
{
var span = new Span<byte>(buffer);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0, 4), i1);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4, 4), i2);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(8, 2), s1);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(10, 2), s2);
}
private void PopulateBufferLEExplicitImpl(int i1, int i2, short i3, short i4)
{
buffer[0] = (byte)(i1 & 0xFF);
buffer[1] = (byte)((i1 >> 8) & 0xFF);
buffer[2] = (byte)((i1 >> 16) & 0xFF);
buffer[3] = (byte)((i1 >> 24) & 0xFF);
buffer[4] = (byte)(i2 & 0xFF);
buffer[5] = (byte)((i2 >> 8) & 0xFF);
buffer[6] = (byte)((i2 >> 16) & 0xFF);
buffer[7] = (byte)((i2 >> 24) & 0xFF);
buffer[8] = (byte)(i3 & 0xFF);
buffer[9] = (byte)((i3 >> 8) & 0xFF);
buffer[10] = (byte)(i4 & 0xFF);
buffer[11] = (byte)((i4 >> 8) & 0xFF);
}
private void PopulateBufferBESpanImpl(int i1, int i2, short s1, short s2)
{
var span = new Span<byte>(buffer);
BinaryPrimitives.WriteInt32BigEndian(span.Slice(0, 4), i1);
BinaryPrimitives.WriteInt32BigEndian(span.Slice(4, 4), i2);
BinaryPrimitives.WriteInt16BigEndian(span.Slice(8, 2), s1);
BinaryPrimitives.WriteInt16BigEndian(span.Slice(10, 2), s2);
}
private void PopulateBufferBEExplicitImpl(int i1, int i2, short i3, short i4)
{
buffer[0] = (byte)((i1 >> 24) & 0xFF);
buffer[1] = (byte)((i1 >> 16) & 0xFF);
buffer[2] = (byte)((i1 >> 8) & 0xFF);
buffer[3] = (byte)(i1 & 0xFF);
buffer[4] = (byte)((i2 >> 24) & 0xFF);
buffer[5] = (byte)((i2 >> 16) & 0xFF);
buffer[6] = (byte)((i2 >> 24) & 0xFF);
buffer[7] = (byte)(i2 & 0xFF);
buffer[8] = (byte)((i3 >> 8) & 0xFF);
buffer[9] = (byte)(i3 & 0xFF);
buffer[10] = (byte)((i4 >> 8) & 0xFF);
buffer[11] = (byte)(i4 & 0xFF);
}
Run Code Online (Sandbox Code Playgroud)
结果:
using System;
using System.Buffers.Binary;
public class C
{
byte[] PopulateBufferSpan(int i1, short s2)
{
var buffer = new byte[6];
var span = new Span<byte>(buffer);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4), i1);
BinaryPrimitives.WriteInt16LittleEndian(span.Slice(4,2), s2);
return buffer;
}
byte[] PopulateBufferExplicit(int i1, short s2)
{
var buffer = new byte[6];
buffer[0] = (byte)(i1 & 0xFF);
buffer[1] = (byte)((i1 >> 8) & 0xFF);
buffer[2] = (byte)((i1 >> 16) & 0xFF);
buffer[3] = (byte)((i1 >> 24) & 0xFF);
buffer[4] = (byte)(s2 & 0xFF);
buffer[5] = (byte)((s2 >> 8) & 0xFF);
return buffer;
}
}
Run Code Online (Sandbox Code Playgroud)
也许令人惊讶的是,Span基于 - 的方法比进行位操作要快得多。这可能是因为 x86 是小端的,并且BinaryPrimitives意识到它可以将值直接写入数组,而无需单独提取和分配每个字节,但 BE 变体也显示出相当显着的差异。
| 归档时间: |
|
| 查看次数: |
910 次 |
| 最近记录: |