【发布时间】:2021-11-23 09:17:45
【问题描述】:
为什么HashSet<string>(StringComparer.InvariantCultureIgnoreCase) (Perf_HashSet_CaseInsensitive) perf'ing(表现)如此糟糕?相对而言,Perf_HashSet 解决方法的性能提高了 20 倍。
// perf Contains(): 1M iterations, 25 size (unsuccessful lookup)
Test Duration (ms)
Perf_HashSet 43 <-
Perf_Dictionary 49
Perf_HybridDictionary 63
Perf_ListDictionary 223
Perf_List 225
Perf_HashSet_CaseInsensitive 903 <-
代码:
// <TargetFramework>net5.0</TargetFramework>
[TestFixture]
public class ContainsPerfTests
{
private const int iterations = 1_000_000;
private const int size = 25;
[Test]
[Explicit]
public void Perf_List()
{
var list = new List<string>();
for (int i = 0; i < size; i++)
{
list.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = list.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HashSet()
{
var hashSet = new HashSet<string>();
for (int i = 0; i < size; i++)
{
hashSet.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = hashSet.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HashSet_CaseInsensitive()
{
var hashSetCaseInsensitive = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
for (int i = 0; i < size; i++)
{
hashSetCaseInsensitive.Add(Guid.NewGuid().ToString().ToLowerInvariant());
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = hashSetCaseInsensitive.Contains(x);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_Dictionary()
{
var dictionary = new Dictionary<string, bool>();
for (int i = 0; i < size; i++)
{
dictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), false);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = dictionary.ContainsKey(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_HybridDictionary()
{
var hybridDictionary = new HybridDictionary(caseInsensitive: true);
for (int i = 0; i < size; i++)
{
hybridDictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), null);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = hybridDictionary.Contains(x);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
[Test]
[Explicit]
public void Perf_ListDictionary()
{
var listDictionary = new ListDictionary();
for (int i = 0; i < size; i++)
{
listDictionary.Add(Guid.NewGuid().ToString().ToLowerInvariant(), null);
}
var x = Guid.NewGuid().ToString();
var sw = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
var contains = listDictionary.Contains(x.ToLowerInvariant());
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
}
}
【问题讨论】:
-
您不应该使用
OrdinalIgnoreCase进行公平测试吗?InvariantCultureIgnoreCase使用不变文化的规则执行文化感知比较。 -
它们的行为不同,因此它们的性能特征不相关。
-
此外,您还没有计算通过
ToLower...()调用创建的内存分配,您将不得不为以后使用 GC 收集的那些付费。 -
使用忽略大小写比较器并使用 toLower() 插入元素似乎是多余的
-
@Blindy 您必须为实际可以收集的字符串支付收集费用。如果在有问题的实际代码中,原始字符串将保持扎根,那么您正在增加内存占用,而不是 [只是] 增加 GC 压力。
标签: c# performance hashset