【发布时间】:2020-05-27 01:31:32
【问题描述】:
在创建一个应该尽可能快的小型跟踪器类时,我正在查看生成的汇编代码。这个想法是创建一个包装类,它在离开时跟踪方法进入,当方法离开时,方法离开跟踪。这可以通过 using 语句来实现
public void DoStruct()
{
using var tmp = Wrapper.Create(nameof(DoStruct));
}
这将创建一个结构 Tracer 实例,该实例在方法离开时自动处理。到目前为止,一切都很好。现在让我们看看生成程序集:
NetCoreJitStruct.User.DoStruct()
Begin 00007FFC91651860, size 61
00007ffc`91651860 push rbp
00007ffc`91651861 sub rsp,30h
00007ffc`91651865 lea rbp,[rsp+30h]
00007ffc`9165186a xor eax,eax
00007ffc`9165186c mov qword ptr [rbp-8],rax
00007ffc`91651870 mov qword ptr [rbp-10h],rsp
00007ffc`91651874 mov qword ptr [rbp+10h],rcx
00007ffc`91651878 mov rcx,268F07F30D8h
00007ffc`91651882 mov rcx,qword ptr [rcx]
00007ffc`91651885 call 00007ffc`91650800 (NetCoreJitStruct.Wrapper.Create(System.String) *** Ctor called
00007ffc`9165188a mov qword ptr [rbp-8],rax
00007ffc`9165188e jmp 00007ffc`91651890
00007ffc`91651890 mov rcx,rsp
00007ffc`91651893 call 00007ffc`9165189f (NetCoreJitStruct.User.DoClass() *** Dispose Called via extra method Call!
00007ffc`91651898 nop
00007ffc`91651899 lea rsp,[rbp]
00007ffc`9165189d pop rbp
00007ffc`9165189e ret
00007ffc`9165189f push rbp ** Dispose wrapper method
00007ffc`916518a0 sub rsp,30h
00007ffc`916518a4 mov rbp,qword ptr [rcx+20h]
00007ffc`916518a8 mov qword ptr [rsp+20h],rbp
00007ffc`916518ad lea rbp,[rbp+30h]
00007ffc`916518b1 lea rcx,[rbp-8]
00007ffc`916518b5 call 00007ffc`91650818 (NetCoreJitStruct.Wrapper.Dispose()
00007ffc`916518ba nop
00007ffc`916518bb add rsp,30h
00007ffc`916518bf pop rbp
00007ffc`916518c0 ret
我不明白为什么 JIT 编译器将 dispose 方法调用分解为一个额外的包装方法。 .NET 4.8 + 一些效率更低的代码生成的行为是相同的。 我已经检查了这是否是 struct 方法内联的问题,但是对于已处置的类,行为是相同的。
这是我使用 .NET 可以获得的最快速度,还是我错过了一些模式以使其更快/更适合 JIT?
编译器:C# 7+ .NET:.NET 4.8 或 .NET Core 3.1
以下是完整的源代码
使用系统; 使用 System.Collections.Generic; 使用 System.Diagnostics; 使用 System.Runtime.CompilerServices;
命名空间 NetCoreJitStruct { 课堂节目 {
[MethodImpl(MethodImplOptions.NoInlining)]
static void Main(string[] args)
{
Queue<string> argList = new Queue<string>(args);
bool useStruct = true;
bool nofactory = false;
bool nop = false;
bool direct = false;
while (argList.Count > 0)
{
string arg = argList.Dequeue().ToLower();
switch (arg)
{
case "-trace":
CustomData.IsEnabled = true;
break;
case "-struct":
break;
case "-class":
useStruct = false;
break;
case "-direct":
direct = true;
break;
case "-nop":
nop = true;
break;
case "-nofactory":
nofactory = true;
break;
default:
Console.WriteLine("NtCoreJitStruct [-trace] [-struct or -class]");
return;
}
}
var user = new User();
user.DoClass_Factory();
user.DoStruct_Factory();
user.DoStructTryFinally();
user.DoStructNoFinally_Factory();
user.DoStructNoFinally_NoFactory();
const int Runs = 1500_000_000;
var sw = Stopwatch.StartNew();
if (nop) // measure loop overhead
{
for (int i = 0; i < Runs; i++)
{
}
}
else
{
if (useStruct)
{
if (direct)
{
if (nofactory)
{
for (int i = 0; i < Runs; i++)
{
user.DoStructNoFinally_NoFactory();
}
}
else
{
for (int i = 0; i < Runs; i++)
{
user.DoStructNoFinally_Factory();
}
}
}
else
{
if (nofactory)
{
for (int i = 0; i < Runs; i++)
{
user.DoStruct_NoFactory();
}
}
else
{
for (int i = 0; i < Runs; i++)
{
user.DoStruct_Factory();
}
}
}
}
else
{
for (int i = 0; i < Runs; i++)
{
user.DoClass_Factory();
}
}
}
sw.Stop();
string scenario = useStruct ? "Struct" : "Class";
Console.WriteLine($"Scenario: {scenario} NoFactory: {nofactory} Nop: {nop} Direct: {direct} Did execute {Runs:N0} Trace calls in {sw.Elapsed.TotalMilliseconds:F0} ms");
}
}
class User
{
[MethodImpl(MethodImplOptions.NoInlining)]
public void DoClass_Factory()
{
using var tmp = CWrapper.Create(nameof(DoClass_Factory));
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void DoStruct_Factory()
{
using var tmp = Wrapper.Create(nameof(DoStruct_Factory));
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void DoStructTryFinally()
{
var tmp = Wrapper.Create(nameof(DoStruct_Factory));
try
{
}
finally
{
tmp.Dispose();
}
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void DoStructNoFinally_Factory()
{
var tmp = Wrapper.Create(nameof(DoStruct_Factory));
tmp.Dispose();
}
[MethodImpl(MethodImplOptions.NoInlining)]
public void DoStructNoFinally_NoFactory()
{
var tmp = new Wrapper(nameof(DoStruct_Factory));
tmp.Dispose();
}
[MethodImpl(MethodImplOptions.NoInlining)]
internal void DoStruct_NoFactory()
{
using var tmp = new Wrapper(nameof(DoStruct_NoFactory));
}
}
public struct Wrapper : IDisposable
{
CustomData data_;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Wrapper Create(string input)
{
return CustomData.IsEnabled ? new Wrapper(input) : default;
}
public Wrapper(string a)
{
data_ = CustomData.IsEnabled ? new CustomData(a) : null;
}
public void Dispose()
{
if (data_ != null)
{
data_.Dispose();
}
}
}
public class CWrapper : IDisposable
{
CustomData data_;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static CWrapper Create(string input)
{
return CustomData.IsEnabled ? new CWrapper(input) : default;
}
public CWrapper(string a)
{
data_ = CustomData.IsEnabled ? new CustomData(a) : null;
}
public void Dispose()
{
if (data_ != null)
{
data_.Dispose();
}
}
}
class CustomData : IDisposable
{
public static bool IsEnabled;
string myData;
public CustomData(string data)
{
myData = data;
Console.WriteLine("Entered method {0}", data);
}
public void Dispose()
{
Console.WriteLine("Left method {0}", myData);
}
}
}
一些测试结果:
JitStruct.exe -nop
Scenario: Struct NoFactory: False Nop: True Direct: False Did execute 1,500,000,000 Trace calls in 404 ms
JitStruct.exe -struct
Scenario: Struct NoFactory: False Nop: False Direct: False Did execute 1,500,000,000 Trace calls in 4837 ms
JitStruct.exe -struct
Scenario: Struct NoFactory: False Nop: False Direct: False Did execute 1,500,000,000 Trace calls in 4832 ms
JitStruct.exe -struct -direct
Scenario: Struct NoFactory: False Nop: False Direct: True Did execute 1,500,000,000 Trace calls in 4146 ms
JitStruct.exe -struct -direct
Scenario: Struct NoFactory: False Nop: False Direct: True Did execute 1,500,000,000 Trace calls in 4156 ms
JitStruct.exe -struct -direct -nofactory
Scenario: Struct NoFactory: True Nop: False Direct: True Did execute 1,500,000,000 Trace calls in 6424 ms
JitStruct.exe -struct -direct -nofactory
Scenario: Struct NoFactory: True Nop: False Direct: True Did execute 1,500,000,000 Trace calls in 6389 ms
JitStruct.exe -struct -direct -nofactory
Scenario: Struct NoFactory: True Nop: False Direct: True Did execute 1,500,000,000 Trace calls in 6417 ms
JitStruct.exe -struct -nofactory
Scenario: Struct NoFactory: True Nop: False Direct: False Did execute 1,500,000,000 Trace calls in 6063 ms
【问题讨论】:
-
您确定 Dispose 包装器方法会导致性能问题吗?
-
这不是包装方法,而是funclet。有 finally cloning 这样的东西应该优化简单的场景,但我不会假装理解它什么时候起作用或不起作用。
-
@Robert Harvey:通过计算多余的说明:是的。您可以通过调用 DoStructNoFinally 来尝试它,它会生成我期望的代码。
-
您是否对此代码运行了分析器并确定这是一个实际的性能问题,与您的一个非功能性软件要求相冲突?
-
由于 funclet 是正确展开异常堆栈所必需的,因此只要需要
finally,您现在看到的代码可能是 JIT 可以做的最好的代码。除非其中一位从事此工作的人可以通过管道找出finally克隆是否可以提供帮助(如果可以,如何触发它)但您更有机会通过回购唤醒其中一个(或者一个问题或在 Gitter 上)。
标签: c# .net performance jit