基础介绍

仓储地址

https://github.com/dotnet/runtime/

我本地的项目位置

C:\project\SourceCode\runtime-5.0.0-preview.3.20214.6\src\libraries\System.Collections

实现原理和Dictionary差不多,都是链地址法解决冲突。

Dictionary 有Key Value

HashSet只有Value

实际容器为Slot[] m_slots;

internal struct Slot 
{
  internal int hashCode;      // Lower 31 bits of hash code, -1 if unused
  internal int next;          // Index of next entry, -1 if last
  internal T value;
}

HashSet操作元素的时间复杂度接近O(1)

定义int[] m_buckets 数组来保存元素在实际容器Slot[] m_slots 位置

即 Value的保存在 m_slots[m_buckets[value.GetHashCode()%m_buckets.Length]].value

容器长度为质数

质数只能被1和自身整除

减少位置冲突

数据已满时添加数据扩容会自动扩充当前容量的2倍

新建一个2倍大小的容器

数据拷贝过去 重新计算位置

使用优化点

已知容器大小的情况 直接初始化对应大小

自定义元素可以实现IEqualityComparer可以更高效判断相等和获取HashCode

 

哈希函数

当位置冲突时使用Slot.next保存数据,也就是拉链法解决冲突。

hashCode = value == null ? 0 : InternalGetHashCode(comparer.GetHashCode(value));

这里comparer就是IEqualityComparer<T>? comparer = _comparer;可以是默认的,也可以构造函数传入

InternalGetHashCode方法如下

private static int InternalGetHashCode(T item, IEqualityComparer<T>? comparer)
{
    if (item == null)
    {
        return 0;
    }

    int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode();
    return hashCode & Lower31BitMask;
}

最后通过hashCode对桶长度求余获取bucket

bucket = hashCode % _buckets!.Length;

 

内部AddIfNotPresent方法

/// <summary>
/// Adds value to MyHashSet if not contained already
/// Returns true if added and false if already present
/// </summary>
/// <param name="value">value to find</param>
/// <returns></returns>
private bool AddIfNotPresent(T value)
{
    if (_buckets == null)
    {
        Initialize(0);
    }

    int hashCode = InternalGetHashCode(value);
    int bucket = hashCode % _buckets.Length;
    int collisionCount = 0;
    //把快照保存下来
    Slot[] slots = _slots;

    //遍历整个链 _buckets[bucket]- 1 是第一个要查找的位置  如果没找到 i就是-1 可以一直走下一步
    for (int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next)
    {
        //已存在相同的元素
        if (slots[i].hashCode == hashCode && _comparer.Equals(slots[i].value, value))
        {
            return false;
        }

        //冲突次数大于slots的长度了
        if (collisionCount >= slots.Length)
        {
            // The chain of entries forms a loop, which means a concurrent update has happened.
            throw new InvalidOperationException( );
        }
        collisionCount++;
    }

    int index;

    //获取空闲位置
    if (_freeList >= 0)
    {
        index = _freeList;
        _freeList = slots[index].next;
    }
    else
    {
        if (_lastIndex == slots.Length)
        {
            IncreaseCapacity();
            // this will change during resize
            slots = _slots;
            bucket = hashCode % _buckets.Length;
        }
        index = _lastIndex;
        _lastIndex++;
    }
    //存入数据,记录索引
    slots[index].hashCode = hashCode;
    slots[index].value = value;
    slots[index].next = _buckets[bucket] - 1;
    _buckets[bucket] = index + 1;
    _count++;
    _version++;

    return true;
}

 

HashHelpers辅助类

这里HashHelpers是用来求素数和获取下一次扩容的大小的辅助类,里面有一个数组存放基础素数,如果容量超过已有素数,会通过数学的方法计算出需要的素数。

public class HashHelpers
{
    public const uint HashCollisionThreshold = 100;

    // This is the maximum prime smaller than Array.MaxArrayLength
    public const int MaxPrimeArrayLength = 0x7FEFFFFD;
    public const int HashPrime = 101;

    private static readonly int[] s_primes =
    {
        3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
        1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
        17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
        187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
        1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369
    };

    /// <summary>
    /// 判断是否为质数(素数)
    /// </summary>
    /// <param name="candidate"></param>
    /// <returns></returns>
    public static bool IsPrime(int candidate)
    {
        //按位与1不等于0 如果等于0那么只能为2  不然就肯定可以被2整除
        if ((candidate & 1) != 0)
        {
            //求该数的平方根  
            int limit = (int)Math.Sqrt(candidate);
            //从3开始遍历,一直到平方根  大于平方根的数去除肯定是1.xxxx的,不用去判断
            //每次+2是跳过 偶数
            for (int divisor = 3; divisor <= limit; divisor += 2)
            {
                if ((candidate % divisor) == 0)
                    return false;
            }
            return true;
        }
        return candidate == 2;
    }

    /// <summary>
    /// 获取质数
    /// </summary>
    /// <param name="min">最小值</param>
    /// <returns></returns>
    public static int GetPrime(int min)
    {
        if (min < 0)
            throw new ArgumentException();
        //遍历已有数组
        foreach (int prime in s_primes)
        {
            if (prime >= min)
                return prime;
        }

        // Outside of our predefined table. Compute the hard way.
        //不在数组范围内,进行计算  把i的最后一位与1求或   要么不变要么加1  因为最后一位不为1的是不能为素数的(除了2)  每次+2是跳过 偶数
        for (int i = (min | 1); i < int.MaxValue; i += 2)
        {
            if (IsPrime(i) && ((i - 1) % HashPrime != 0))
                return i;
        }
        return min;
    }

    // Returns size of hashtable to grow to.
    public static int ExpandPrime(int oldSize)
    {
        int newSize = 2 * oldSize;

        // Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow.
        // Note that this check works even when _items.Length overflowed thanks to the (uint) cast
        if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize)
        {
           
            return MaxPrimeArrayLength;
        }

        return GetPrime(newSize);
    }
}

 

对外公有方法

/// <summary>
/// 与另一个集合合并
/// </summary>
/// <param name="other"></param>
public void UnionWith(IEnumerable<T> other)
{
    if (other == null)
    {
        throw new ArgumentNullException(nameof(other));
    }

    foreach (T item in other)
    {
        AddIfNotPresent(item);
    }
}

/// <summary>
/// 删除和other相等的项
/// </summary>
/// <param name="other"></param>
public void ExceptWith(IEnumerable<T> other)
{
    if (other == null)
    {
        throw new ArgumentNullException("other");
    }

    // this is already the enpty set; return
    if (m_count == 0)
    {
        return;
    }

    // special case if other is this; a set minus itself is the empty set
    if (other == this)
    {
        Clear();
        return;
    }

    // remove every element in other from this
    foreach (T element in other)
    {
        Remove(element);
    }
}

/// <summary>
/// 修改自身 删除存在自身和other的元素
/// </summary>
/// <param name="other"></param>
public void SymmetricExceptWith(IEnumerable<T> other)
{
    if (other == null)
    {
        throw new ArgumentNullException("other");
    }

    // if set is empty, then symmetric difference is other
    if (m_count == 0)
    {
        UnionWith(other);
        return;
    }

    // special case this; the symmetric difference of a set with itself is the empty set
    if (other == this)
    {
        Clear();
        return;
    }

    MyHashSet<T> otherAsSet = other as MyHashSet<T>;
    // If other is a HashSet, it has unique elements according to its equality comparer,
    // but if they're using different equality comparers, then assumption of uniqueness
    // will fail. So first check if other is a hashset using the same equality comparer;
    // symmetric except is a lot faster and avoids bit array allocations if we can assume
    // uniqueness
    if (otherAsSet != null && AreEqualityComparersEqual(this, otherAsSet))
    {
        SymmetricExceptWithUniqueHashSet(otherAsSet);
    }
    else
    {
        SymmetricExceptWithEnumerable(other);
    }
}
对外公有方法

相关文章:

  • 2021-06-10
  • 2020-03-12
  • 2021-09-08
  • 2021-11-15
  • 2021-06-13
猜你喜欢
  • 2021-10-31
  • 2021-06-12
  • 2021-03-31
  • 2021-10-30
  • 2021-06-20
相关资源
相似解决方案