当每块运行超过32个线程时,为什么性能会提高?
我的显卡有480个CUDA核心(15 MS*32 SP).
我想问一下关于下面代码的简单问题:
static void Main(string[] args)
{
MainAsync()
//.Wait();
.GetAwaiter().GetResult();
}
static async Task MainAsync()
{
Console.WriteLine("Hello World!");
Task<int> a = Calc(18000);
Task<int> b = Calc(18000);
Task<int> c = Calc(18000);
await a;
await b;
await c;
Console.WriteLine(a.Result);
}
static async Task<int> Calc(int a)
{
//await Task.Delay(1);
Console.WriteLine("Calc started");
int result = 0;
for (int k = 0; k < a; ++k)
{
for (int l = 0; l < a; ++l)
{
result += l;
}
}
return result;
}
Run Code Online (Sandbox Code Playgroud)
此示例 …