在 C# 中使用 span 替换小字符串中出现的最快方法

Ric*_*rdo 6 c# performance .net-core asp.net-core

我正在尝试最大限度地提高 C# 中 string.Replace 方法的 CPU 性能和内存优化。目标是减少内存分配和 CPU 时间,因为该项目位于 10000 rps 的 ASP.NET Core 中。

我发现了两个提高性能的技巧:1)使用Span Struct 2)使用String.Create

   internal struct ContextData
    {
        public string Origin { get; set; }
        public string Replace { get; set; }
        public string With { get; set; }
    }




    internal string SpanReplaceWithCreate(ContextData context)
    {
        int count = 0;
     
        ReadOnlySpan<char> origin_span = context.Origin.AsSpan();
        ReadOnlySpan<char> replace_span = context.Replace.AsSpan();
        ReadOnlySpan<char> replace_with = context.With.AsSpan();

        int index;
        ReadOnlySpan<char> tmp = origin_span;

        while ((index = tmp.IndexOf(replace_span)) > 0)
        {
            count++;
            tmp = tmp.Slice(index + replace_span.Length, tmp.Length - replace_span.Length - index);
        }

        string a = string.Create(context.Origin.Length + (context.Replace.Length - context.With.Length) * count, context, (chars, state) =>
           {
               // NOTE: We don't access the context variable in this delegate since 
               // it would cause a closure and allocation.
               // Instead we access the state parameter.

               // will track our position within the string data we are populating
               var position = 0;
               ReadOnlySpan<char> origin = state.Origin.AsSpan();
               ReadOnlySpan<char> replace = state.Replace.AsSpan();
               ReadOnlySpan<char> with = state.With.AsSpan();

               ReadOnlySpan<char> tmp_context = origin;

               while ((index = tmp_context.IndexOf(replace)) > 0)
               {
                   tmp_context.Slice(0, index).CopyTo(chars.Slice(position));
                   with.CopyTo(chars.Slice(position + index));
                   position += (index + with.Length);
                   tmp_context = tmp_context.Slice(index + replace.Length, tmp_context.Length - replace.Length - index);
               }

               if (position < chars.Length) {
                   tmp_context.CopyTo(chars.Slice(position));
               }

           });


        return a;
    }
Run Code Online (Sandbox Code Playgroud)

但与字符串相比,我的性能仍然最差。替换

方法 网址 寻找 代替 意思是 错误 标准差 中位数 第0代 第一代 第2代 已分配
字符串替换 http(...) 眼神交流 [196] 谷歌 AFD 370.4纳秒 9.37纳秒 27.33纳秒 360.7纳秒 1 0.0319 - - 336 乙
字符串替换为创建 http(...) 眼神交流 [196] 谷歌 AFD 492.8纳秒 9.60纳秒 12.15纳秒 490.4纳秒 2 0.0563 - - 592 乙

有什么建议吗?

这里是测试参数

https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}



{camp} : "campiagn_it_banner_size_360"
{publisher_id} : "78983"
{transaction} : "c1032072-f815-413b-a57c-4a027f681e60"
{aff_sub1} : "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"
{aff_site} : "vbvsdgdavhdgdvjs_46_789-p90"
{creative_id} : "360x360"
{ua} : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
{ip} : "192.168.1.1"
{lang} : "en"
Run Code Online (Sandbox Code Playgroud)

更新1

[Benchmark]
    public string FastTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";

        int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;

        return string.Create(max_allocation, redirecturl, (chars, state) =>
        {
            ReadOnlySpan<char> tmp = state.AsSpan();
            int position = 0;
            int placeholder_start;
            while ((placeholder_start = tmp.IndexOf('{')) > 0)
            {
                int placeholder_end = tmp.Slice(placeholder_start).IndexOf('}');
                if (placeholder_end < 0)
                {
                    //copy the last part
                    tmp.CopyTo(chars.Slice(position));
                    break;
                }
                else
                {
                    tmp.Slice(0, placeholder_start).CopyTo(chars.Slice(position));
                    ReadOnlySpan<char> replace = tmp.Slice(placeholder_start, placeholder_end + 1);

                    //OPTIMIZE HERE?
                    ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();

                    with.CopyTo(chars.Slice(position + placeholder_start));
                    position += (placeholder_start + with.Length);
                    tmp = tmp.Slice(placeholder_start + replace.Length, tmp.Length - replace.Length - placeholder_start);
                }

            }

        });
    }

 class Placeholders
{



    public const string camp = "{camp}";
    public const string publisher_id = "{publisher_id}";
    public const string creative_id = "{creative_id}";
    public const string ua = "{ua}";
    public const string lang = "{lang}";
    public const string ip = "{ip}";
    public const string Transaction = "{transaction}";
    public const string AffSite = "{aff_site}";
    public const string AdsetId = "{adset}";
    public const string AffSub1 = "{affsub1}";


    public static string getVal(string key)
    {

        switch (key)
        {
            case camp:
                return "campiagn_it_banner_size_360";
            case publisher_id:
                return "78983";
            case Transaction:
                return "c1032072-f815-413b-a57c-4a027f681e60";
            case AffSub1:
                return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a";
            case AffSite:
                return "vbvsdgdavhdgdvjs_46_789-p90";
            case creative_id:
                return "360x360";
            case ua:
                return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36";
            case ip:
                return "192.168.1.1";
            default:
                return "";
        }
    }

    public static ReadOnlySpan<char> getVal(ReadOnlySpan<char> key)
    {

        if (MemoryExtensions.Equals(key, camp, StringComparison.Ordinal))
            return "campiagn_it_banner_size_360".AsSpan();
        else if (MemoryExtensions.Equals(key, publisher_id, StringComparison.Ordinal))
            return "78983".AsSpan();
        else if (MemoryExtensions.Equals(key, Transaction, StringComparison.Ordinal))
            return "c1032072-f815-413b-a57c-4a027f681e6".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSub1, StringComparison.Ordinal))
            return "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a".AsSpan();
        else if (MemoryExtensions.Equals(key, AffSite, StringComparison.Ordinal))
            return "vbvsdgdavhdgdvjs_46_789-p90".AsSpan();
        else if (MemoryExtensions.Equals(key, creative_id, StringComparison.Ordinal))
            return "360x360".AsSpan();
        else if (MemoryExtensions.Equals(key, ua, StringComparison.Ordinal))
            return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36".AsSpan();
        else if (MemoryExtensions.Equals(key, ip, StringComparison.Ordinal))
            return "192.168.1.1".AsSpan();
        else
            return "".AsSpan();


    }




}
   [Benchmark]
    public string StandardTokenReplace()
    {

        string request = "http://wwww.example.com?a=campiagn_it_banner_size_360&b=78983&h=c1032072-f815-413b-a57c-4a027f681e6&y=78bea32a-6ead-4ea0-b9f2-9489ebc43d6a&ty=vbvsdgdavhdgdvjs_46_789-p90&yhhh=360x360&sua=Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F90.0.4430.93%20Safari%2F537.36&ppp=192.168.1.1";
        string redirecturl = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}&ieruiero{343454";
        int max_allocation = Math.Max(request.Length, redirecturl.Length) + Math.Abs(request.Length - redirecturl.Length);

        //get original url and take the longest one + domain


        return redirecturl.Replace(Placeholders.camp, Placeholders.getVal(Placeholders.camp))
            .Replace(Placeholders.publisher_id, Placeholders.getVal(Placeholders.publisher_id))
            .Replace(Placeholders.creative_id, Placeholders.getVal(Placeholders.creative_id))
            .Replace(Placeholders.ua, Placeholders.getVal(Placeholders.ua))
            .Replace(Placeholders.lang, Placeholders.getVal(Placeholders.lang))
            .Replace(Placeholders.ip, Placeholders.getVal(Placeholders.ip))
            .Replace(Placeholders.Transaction, Placeholders.getVal(Placeholders.Transaction))
            .Replace(Placeholders.AffSite, Placeholders.getVal(Placeholders.AffSite))
            .Replace(Placeholders.AdsetId, Placeholders.getVal(Placeholders.AdsetId))
            .Replace(Placeholders.AffSub1, Placeholders.getVal(Placeholders.AffSub1));

    }
Run Code Online (Sandbox Code Playgroud)

1 最大分配

    int max_allocation = Math.Max(request.Length, redirecturl.Length) * 3;
Run Code Online (Sandbox Code Playgroud)

我们可以计算出字符串的正确大小,但它的性能会更差。对于这种情况,我们可以假设一个最大长度。

www.example.com?camp=1234567890123456789023456789012345678902345678 www.replace.com?{camp}{camp}{camp}{camp}{camp}{camp}{camp}

行不通的。

2 获得价值

   ReadOnlySpan<char> with = Placeholders.getVal(replace.ToString()).AsSpan();
Run Code Online (Sandbox Code Playgroud)

如果占位符重复,我们可以缓存该值或在移动到下一个占位符之前搜索所有出现的情况。

公共静态字符串 getVal(string key) 与公共静态字符串 getVal(ReadOnlySpan key)

使用字符串版本我们仍然可以获得更好的性能。有什么建议可以改进吗?

// * 概括 *

BenchmarkDotNet=v0.12.1,操作系统=Windows 10.0.19041.928 (2004/?/20H1) Intel Core i9-10900 CPU 2.80GHz,1 个 CPU,20 个逻辑核心和 10 个物理核心 .NET Core SDK=5.0.202 [主机] : . NET Core 3.1.14(CoreCLR 4.700.21.16201、CoreFX 4.700.21.16208)、X64 RyuJIT .NET Core 3.1:.NET Core 3.1.14(CoreCLR 4.700.21.16201、CoreFX 4.700.21.16208)、X64 RyuJIT

作业=.NET Core 3.1 工具链=.NET Core 3.1

方法 网址 寻找 代替 意思是 错误 标准差 第0代 第一代 第2代 已分配
快速令牌替换 [196] 518.8纳秒 4.63纳秒 3.61纳秒 1 0.2470 0.0038 - 2.52 KB
FastTokenReplaceImproveMem [196] 584.4纳秒 6.84纳秒 5.71纳秒 2 0.2050 0.0010 - 2.09 KB
标准令牌替换 [196] 4,242.7 纳秒 84.82纳秒 94.27纳秒 3 0.6866 - - 7.06 KB

Net*_*age 2

使用示例数据运行一些测试,这似乎String.Replace是高度优化的,并且返回首先找到匹配项StringBuilder.Replace的 my 的变体IndexOfAny(基于 CodeReview 的改进)速度较慢。使用元组数组进行替换是我的测试中最快的:

var s = "https://www.example.com/xxxxx?campaign={camp}&adgroup={publisher_id}&install_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{transaction}&session_callback=https%3A%2F%2Fpostback.example.com%3Ftransaction%3D{aff_sub1}&affsite={aff_site}&clickid={transaction}&adset_id={creative_id}&user_agent={ua}&ip={ip}&language={lang}";

var replacementsa = new[] {
        ("{camp}", "campiagn_it_banner_size_360"),
        ("{publisher_id}", "78983"),
        ("{transaction}", "c1032072-f815-413b-a57c-4a027f681e60"),
        ("{aff_sub1}", "78bea32a-6ead-4ea0-b9f2-9489ebc43d6a"),
        ("{aff_site}", "vbvsdgdavhdgdvjs_46_789-p90"),
        ("{creative_id}", "360x360"),
        ("{ua}", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"),
        ("{ip}", "192.168.1.1"),
        ("{lang}", "en")
    };

public static string MultiReplace(this string s, (string match,string replace)[] replacements) {
    for (int replacementNum = 0; replacementNum < replacements.Length; ++replacementNum)
        s = s.Replace(replacements[replacementNum].match, replacements[replacementNum].replace);

    return s;
}
Run Code Online (Sandbox Code Playgroud)

  • @Riccardo 1)您对“var”类型的理解不正确 - 请参阅[有关 var 的 C# 文档](https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/var) 2) 由于我们不是按键查找,而是处理所有值,与“ValueTuple”相比,“Dictionary”只是增加了开销和分配。如果需要动态更改,请使用“List&lt;T&gt;”。3) 每次替换仅一次,但它比 `StringBuilder` 或 `Span` 的替代方案更快,因为它是优化的库代码,所以内存分配和速度哪个更重要? (2认同)