使用 BinaryPrimitives 填充字节缓冲区的首选方法?

Mr.*_*Boy 0 .net c#

我使用System.Buffers.Binary.BinaryPrimitives以精确的方式将值写入字节数组。如果没有来自 MS 的示例示例,我可以看到几种方法来做到这一点,但我不确定其中一种是否比另一种更好。原则上,需要创建大量Span<byte>对象似乎不理想?

考虑这个简单的例子:

//writes these values in this order to a new 16-byte buffer
byte[] PopulateBuffer(int i1,int i2,Int16 s1,Int16 s2)
{
 var buffer = new byte[16]; //padded based on external protocol
 var span = new Span<byte>(buffer);
 BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4),i1);
 BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4,4),i2);
 BinaryPrimitives.WriteInt16LittleEndian(span.Slice(8,2),s1);
 BinaryPrimitives.WriteInt16LittleEndian(span.Slice(10,2),s2);
 return buffer;
}
Run Code Online (Sandbox Code Playgroud)

我在Span这里实例化 5 个对象。与通过位移位手动获取字节的老式方法相比,这看起来很混乱,但实际上开销很大吗?有没有更好的方法来使用这个类?

can*_*on7 5

TL;DR:从下面的结果来看,Span基于 -based 的方法似乎比替代方法快得多


请注意,这Span<T>是一种值类型,JIT 在看穿它方面做得很好。

我创建了一个简化的测试:

using System;
using System.Buffers.Binary;
public class C
{
    byte[] PopulateBufferSpan(int i1, short s2)
    {
         var buffer = new byte[6];
         var span = new Span<byte>(buffer);
         BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4), i1);
         BinaryPrimitives.WriteInt16LittleEndian(span.Slice(4,2), s2);
         return buffer;
    }

    byte[] PopulateBufferExplicit(int i1, short s2)
    {
        var buffer = new byte[6];
        buffer[0] = (byte)(i1 & 0xFF);
        buffer[1] = (byte)((i1 >> 8) & 0xFF);
        buffer[2] = (byte)((i1 >> 16) & 0xFF);
        buffer[3] = (byte)((i1 >> 24) & 0xFF);
        buffer[4] = (byte)(s2 & 0xFF);
        buffer[5] = (byte)((s2 >> 8) & 0xFF);
        return buffer;
    }
}
Run Code Online (Sandbox Code Playgroud)

哪个 JIT 是:

C.PopulateBufferSpan(Int32, Int16)
    L0000: push rdi
    L0001: push rsi
    L0002: sub rsp, 0x28
    L0006: mov esi, edx
    L0008: mov edi, r8d
    L000b: mov rcx, 0x7ffec35e2360
    L0015: mov edx, 0x6
    L001a: call 0x7fff230847e0
    L001f: lea rdx, [rax+0x10]
    L0023: mov ecx, 0x6
    L0028: mov r8d, ecx
    L002b: cmp r8, 0x4
    L002f: jb L0051
    L0031: mov r8, rdx
    L0034: mov [r8], esi
    L0037: mov ecx, ecx
    L0039: cmp rcx, 0x6
    L003d: jb L0057
    L003f: add rdx, 0x4
    L0043: movsx rcx, di
    L0047: mov [rdx], cx
    L004a: add rsp, 0x28
    L004e: pop rsi
    L004f: pop rdi
    L0050: ret
    L0051: call System.ThrowHelper.ThrowArgumentOutOfRangeException()
    L0056: int3
    L0057: call System.ThrowHelper.ThrowArgumentOutOfRangeException()
    L005c: int3

C.PopulateBufferExplicit(Int32, Int16)
    L0000: push rdi
    L0001: push rsi
    L0002: sub rsp, 0x28
    L0006: mov esi, edx
    L0008: mov edi, r8d
    L000b: mov rcx, 0x7ffec35e2360
    L0015: mov edx, 0x6
    L001a: call 0x7fff230847e0
    L001f: mov [rax+0x10], sil
    L0023: mov edx, esi
    L0025: sar edx, 0x8
    L0028: mov [rax+0x11], dl
    L002b: mov edx, esi
    L002d: sar edx, 0x10
    L0030: mov [rax+0x12], dl
    L0033: sar esi, 0x18
    L0036: mov [rax+0x13], sil
    L003a: movsx rdx, di
    L003e: mov [rax+0x14], dl
    L0041: sar edx, 0x8
    L0044: mov [rax+0x15], dl
    L0047: add rsp, 0x28
    L004b: pop rsi
    L004c: pop rdi
    L004d: ret
Run Code Online (Sandbox Code Playgroud)

如您所见,两者的复杂性几乎没有区别,只是使用的版本BinaryPrimitives有一些范围检查(这不是坏事)。

请注意,现在 JIT 是多层的,我认为SharpLab 只显示第一层的结果,所以如果它在热门路径上,这可能会得到改进。

SharpLab 链接


我还使用 BenchmarkDotNet 运行了一个基准测试:

public class MyBenchmark
{
    private byte[] buffer = new byte[32];

    [Benchmark]
    public void PopulateBufferLESpan()
    {
        PopulateBufferLESpanImpl(1, 2, 3, 4);
    }

    [Benchmark]
    public void PopulateBufferLEExplicit()
    {
        PopulateBufferLEExplicitImpl(1, 2, 3, 4);
    }

    [Benchmark]
    public void PopulateBufferBESpan()
    {
        PopulateBufferBESpanImpl(1, 2, 3, 4);
    }

    [Benchmark]
    public void PopulateBufferBEExplicit()
    {
        PopulateBufferBEExplicitImpl(1, 2, 3, 4);
    }

    private void PopulateBufferLESpanImpl(int i1, int i2, short s1, short s2)
    {
        var span = new Span<byte>(buffer);
        BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0, 4), i1);
        BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4, 4), i2);
        BinaryPrimitives.WriteInt16LittleEndian(span.Slice(8, 2), s1);
        BinaryPrimitives.WriteInt16LittleEndian(span.Slice(10, 2), s2);
    }

    private void PopulateBufferLEExplicitImpl(int i1, int i2, short i3, short i4)
    {
        buffer[0] = (byte)(i1 & 0xFF);
        buffer[1] = (byte)((i1 >> 8) & 0xFF);
        buffer[2] = (byte)((i1 >> 16) & 0xFF);
        buffer[3] = (byte)((i1 >> 24) & 0xFF);
        buffer[4] = (byte)(i2 & 0xFF);
        buffer[5] = (byte)((i2 >> 8) & 0xFF);
        buffer[6] = (byte)((i2 >> 16) & 0xFF);
        buffer[7] = (byte)((i2 >> 24) & 0xFF);
        buffer[8] = (byte)(i3 & 0xFF);
        buffer[9] = (byte)((i3 >> 8) & 0xFF);
        buffer[10] = (byte)(i4 & 0xFF);
        buffer[11] = (byte)((i4 >> 8) & 0xFF);
    }

    private void PopulateBufferBESpanImpl(int i1, int i2, short s1, short s2)
    {
        var span = new Span<byte>(buffer);
        BinaryPrimitives.WriteInt32BigEndian(span.Slice(0, 4), i1);
        BinaryPrimitives.WriteInt32BigEndian(span.Slice(4, 4), i2);
        BinaryPrimitives.WriteInt16BigEndian(span.Slice(8, 2), s1);
        BinaryPrimitives.WriteInt16BigEndian(span.Slice(10, 2), s2);
    }

    private void PopulateBufferBEExplicitImpl(int i1, int i2, short i3, short i4)
    {
        buffer[0] = (byte)((i1 >> 24) & 0xFF);
        buffer[1] = (byte)((i1 >> 16) & 0xFF);
        buffer[2] = (byte)((i1 >> 8) & 0xFF);
        buffer[3] = (byte)(i1 & 0xFF);
        buffer[4] = (byte)((i2 >> 24) & 0xFF);
        buffer[5] = (byte)((i2 >> 16) & 0xFF);
        buffer[6] = (byte)((i2 >> 24) & 0xFF);
        buffer[7] = (byte)(i2 & 0xFF);
        buffer[8] = (byte)((i3 >> 8) & 0xFF);
        buffer[9] = (byte)(i3 & 0xFF);
        buffer[10] = (byte)((i4 >> 8) & 0xFF);
        buffer[11] = (byte)(i4 & 0xFF);
    }
Run Code Online (Sandbox Code Playgroud)

结果:

using System;
using System.Buffers.Binary;
public class C
{
    byte[] PopulateBufferSpan(int i1, short s2)
    {
         var buffer = new byte[6];
         var span = new Span<byte>(buffer);
         BinaryPrimitives.WriteInt32LittleEndian(span.Slice(0,4), i1);
         BinaryPrimitives.WriteInt16LittleEndian(span.Slice(4,2), s2);
         return buffer;
    }

    byte[] PopulateBufferExplicit(int i1, short s2)
    {
        var buffer = new byte[6];
        buffer[0] = (byte)(i1 & 0xFF);
        buffer[1] = (byte)((i1 >> 8) & 0xFF);
        buffer[2] = (byte)((i1 >> 16) & 0xFF);
        buffer[3] = (byte)((i1 >> 24) & 0xFF);
        buffer[4] = (byte)(s2 & 0xFF);
        buffer[5] = (byte)((s2 >> 8) & 0xFF);
        return buffer;
    }
}
Run Code Online (Sandbox Code Playgroud)

也许令人惊讶的是,Span基于 - 的方法比进行位操作要快得多。这可能是因为 x86 是小端的,并且BinaryPrimitives意识到它可以将值直接写入数组,而无需单独提取和分配每个字节,但 BE 变体也显示出相当显着的差异。