基础介绍
仓储地址
https://github.com/dotnet/runtime/
我本地的项目位置
C:\project\SourceCode\runtime-5.0.0-preview.3.20214.6\src\libraries\System.Collections
实现原理和Dictionary差不多,都是链地址法解决冲突。
Dictionary 有Key Value
HashSet只有Value
实际容器为Slot[] m_slots;
internal struct Slot { internal int hashCode; // Lower 31 bits of hash code, -1 if unused internal int next; // Index of next entry, -1 if last internal T value; }
HashSet操作元素的时间复杂度接近O(1)
定义int[] m_buckets 数组来保存元素在实际容器Slot[] m_slots 位置
即 Value的保存在 m_slots[m_buckets[value.GetHashCode()%m_buckets.Length]].value
容器长度为质数
质数只能被1和自身整除
减少位置冲突
数据已满时添加数据扩容会自动扩充当前容量的2倍
新建一个2倍大小的容器
数据拷贝过去 重新计算位置
使用优化点
已知容器大小的情况 直接初始化对应大小
自定义元素可以实现IEqualityComparer可以更高效判断相等和获取HashCode
哈希函数
当位置冲突时使用Slot.next保存数据,也就是拉链法解决冲突。
hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value));
这里comparer就是IEqualityComparer<T>? comparer = _comparer;可以是默认的,也可以构造函数传入
InternalGetHashCode方法如下
private static int InternalGetHashCode(T item, IEqualityComparer<T>? comparer) { if (item == null) { return 0; } int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode(); return hashCode & Lower31BitMask; }
最后通过hashCode对桶长度求余获取bucket
bucket = hashCode % _buckets!.Length;
内部AddIfNotPresent方法
/// <summary> /// Adds value to MyHashSet if not contained already /// Returns true if added and false if already present /// </summary> /// <param name="value">value to find</param> /// <returns></returns> private bool AddIfNotPresent(T value) { if (_buckets == null) { Initialize(0); } int hashCode = InternalGetHashCode(value); int bucket = hashCode % _buckets.Length; int collisionCount = 0; //把快照保存下来 Slot[] slots = _slots; //遍历整个链 _buckets[bucket]- 1 是第一个要查找的位置 如果没找到 i就是-1 可以一直走下一步 for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) { //已存在相同的元素 if (slots[i].hashCode == hashCode && _comparer.Equals(slots[i].value, value)) { return false; } //冲突次数大于slots的长度了 if (collisionCount >= slots.Length) { // The chain of entries forms a loop, which means a concurrent update has happened. throw new InvalidOperationException( ); } collisionCount++; } int index; //获取空闲位置 if (_freeList >= 0) { index = _freeList; _freeList = slots[index].next; } else { if (_lastIndex == slots.Length) { IncreaseCapacity(); // this will change during resize slots = _slots; bucket = hashCode % _buckets.Length; } index = _lastIndex; _lastIndex++; } //存入数据,记录索引 slots[index].hashCode = hashCode; slots[index].value = value; slots[index].next = _buckets[bucket] - 1; _buckets[bucket] = index + 1; _count++; _version++; return true; }
HashHelpers辅助类
这里HashHelpers是用来求素数和获取下一次扩容的大小的辅助类,里面有一个数组存放基础素数,如果容量超过已有素数,会通过数学的方法计算出需要的素数。
public class HashHelpers { public const uint HashCollisionThreshold = 100; // This is the maximum prime smaller than Array.MaxArrayLength public const int MaxPrimeArrayLength = 0x7FEFFFFD; public const int HashPrime = 101; private static readonly int[] s_primes = { 3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369 }; /// <summary> /// 判断是否为质数(素数) /// </summary> /// <param name="candidate"></param> /// <returns></returns> public static bool IsPrime(int candidate) { //按位与1不等于0 如果等于0那么只能为2 不然就肯定可以被2整除 if ((candidate & 1) != 0) { //求该数的平方根 int limit = (int)Math.Sqrt(candidate); //从3开始遍历,一直到平方根 大于平方根的数去除肯定是1.xxxx的,不用去判断 //每次+2是跳过 偶数 for (int divisor = 3; divisor <= limit; divisor += 2) { if ((candidate % divisor) == 0) return false; } return true; } return candidate == 2; } /// <summary> /// 获取质数 /// </summary> /// <param name="min">最小值</param> /// <returns></returns> public static int GetPrime(int min) { if (min < 0) throw new ArgumentException(); //遍历已有数组 foreach (int prime in s_primes) { if (prime >= min) return prime; } // Outside of our predefined table. Compute the hard way. //不在数组范围内,进行计算 把i的最后一位与1求或 要么不变要么加1 因为最后一位不为1的是不能为素数的(除了2) 每次+2是跳过 偶数 for (int i = (min | 1); i < int.MaxValue; i += 2) { if (IsPrime(i) && ((i - 1) % HashPrime != 0)) return i; } return min; } // Returns size of hashtable to grow to. public static int ExpandPrime(int oldSize) { int newSize = 2 * oldSize; // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow. // Note that this check works even when _items.Length overflowed thanks to the (uint) cast if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { return MaxPrimeArrayLength; } return GetPrime(newSize); } }
对外公有方法
/// <summary> /// 与另一个集合合并 /// </summary> /// <param name="other"></param> public void UnionWith(IEnumerable<T> other) { if (other == null) { throw new ArgumentNullException(nameof(other)); } foreach (T item in other) { AddIfNotPresent(item); } } /// <summary> /// 删除和other相等的项 /// </summary> /// <param name="other"></param> public void ExceptWith(IEnumerable<T> other) { if (other == null) { throw new ArgumentNullException("other"); } // this is already the enpty set; return if (m_count == 0) { return; } // special case if other is this; a set minus itself is the empty set if (other == this) { Clear(); return; } // remove every element in other from this foreach (T element in other) { Remove(element); } } /// <summary> /// 修改自身 删除存在自身和other的元素 /// </summary> /// <param name="other"></param> public void SymmetricExceptWith(IEnumerable<T> other) { if (other == null) { throw new ArgumentNullException("other"); } // if set is empty, then symmetric difference is other if (m_count == 0) { UnionWith(other); return; } // special case this; the symmetric difference of a set with itself is the empty set if (other == this) { Clear(); return; } MyHashSet<T> otherAsSet = other as MyHashSet<T>; // If other is a HashSet, it has unique elements according to its equality comparer, // but if they're using different equality comparers, then assumption of uniqueness // will fail. So first check if other is a hashset using the same equality comparer; // symmetric except is a lot faster and avoids bit array allocations if we can assume // uniqueness if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet)) { SymmetricExceptWithUniqueHashSet(otherAsSet); } else { SymmetricExceptWithEnumerable(other); } }