【问题标题】:Get longest substring between two strings获取两个字符串之间的最长子字符串
【发布时间】:2014-04-15 11:04:09
【问题描述】:

我有两个字,

britanicaengbritanicahin

我需要找出它们之间最长的常用词,即britanica

如何在 C# 中做到这一点?

【问题讨论】:

标签: c# string substring


【解决方案1】:

试试这个方法:

public static string FindLongestCommonSubstring(string s1, string s2)
{
    int[,] a = new int[s1.Length + 1, s2.Length + 1];
    int row = 0;    // s1 index
    int col = 0;    // s2 index

    for (var i = 0; i < s1.Length; i++)
        for (var j = 0; j < s2.Length; j++)
            if (s1[i] == s2[j])
            {
                int len = a[i + 1, j + 1] = a[i, j] + 1;
                if (len > a[row, col])
                {
                    row = i + 1;
                    col = j + 1;
                }
            }

    return s1.Substring(row - a[row, col], a[row, col]);
}

使用示例:

Console.WriteLine(FindLongestCommonSubstring("britanicaeng", "britanicahin"));

【讨论】:

    【解决方案2】:

    我在 https://iq.opengenus.org/longest-common-substring-using-rolling-hash/ 重构了来自 Ashutosh Singh 的 C++ 代码,以在 C# 中创建滚动哈希方法 - 这将在 O(N * log(N)^2) 时间和 O(N) 空间中找到子字符串

    using System;
    using System.Collections.Generic;
    public class RollingHash
    {
        private class RollingHashPowers
        {
            // _mod = prime modulus of polynomial hashing
            // any prime number over a billion should suffice
            internal const int _mod = (int)1e9 + 123;
            // _hashBase = base (point of hashing)
            // this should be a prime number larger than the number of characters used
            // in my use case I am only interested in ASCII (256) characters
            // for strings in languages using non-latin characters, this should be much larger
            internal const long _hashBase = 257;
            // _pow1 = powers of base modulo mod
            internal readonly List<int> _pow1 = new List<int> { 1 };
            // _pow2 = powers of base modulo 2^64
            internal readonly List<long> _pow2 = new List<long> { 1L };
    
            internal void EnsureLength(int length)
            {
                if (_pow1.Capacity < length)
                {
                    _pow1.Capacity = _pow2.Capacity = length;
                }
                for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
                {
                    _pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
                    _pow2.Add(_pow2[currentIndx] * _hashBase);
                }
            }
        }
    
        private class RollingHashedString
        {
            readonly RollingHashPowers _pows;
            readonly int[] _pref1; // Hash on prefix modulo mod
            readonly long[] _pref2; // Hash on prefix modulo 2^64
    
            // Constructor from string:
            internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
            {
                _pows = pows;
                _pref1 = new int[s.Length + 1];
                _pref2 = new long[s.Length + 1];
    
                const long capAVal = 'A';
                const long capZVal = 'Z';
                const long aADif = 'a' - 'A';
    
                unsafe
                {
                    fixed (char* c = s)
                    {
                        // Fill arrays with polynomial hashes on prefix
                        for (int i = 0; i < s.Length; ++i)
                        {
                            long v = c[i];
                            if (caseInsensitive && capAVal <= v && v <= capZVal)
                            {
                                v += aADif;
                            }
                            _pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
                            _pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
                        }
                    }
                }
            }
    
            // Rollingnomial hash of subsequence [pos, pos+len)
            // If mxPow != 0, value automatically multiply on base in needed power.
            // Finally base ^ mxPow
            internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
            {
                int hash1 = _pref1[pos + len] - _pref1[pos];
                long hash2 = _pref2[pos + len] - _pref2[pos];
                if (hash1 < 0)
                {
                    hash1 += RollingHashPowers._mod;
                }
                if (mxPow != 0)
                {
                    hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
                    hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
                }
                return Tuple.Create(hash1, hash2);
            }
        }
    
        private readonly RollingHashPowers _rhp;
        public RollingHash(int longestLength = 0)
        {
            _rhp = new RollingHashPowers();
            if (longestLength > 0)
            {
                _rhp.EnsureLength(longestLength);
            }
        }
    
        public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
        {
            // Calculate max neede power of base:
            int mxPow = Math.Max(a.Length, b.Length);
            _rhp.EnsureLength(mxPow);
            // Create hashing objects from strings:
            RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
            RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);
    
            // Binary search by length of same subsequence:
            int pos = -1;
            int low = 0;
            int minLen = Math.Min(a.Length, b.Length);
            int high = minLen + 1;
            var tupleCompare = Comparer<Tuple<int, long>>.Default;
            while (high - low > 1)
            {
                int mid = (low + high) / 2;
                List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
                for (int i = 0; i + mid <= a.Length; ++i)
                {
                    hashes.Add(hash_a.Apply(i, mid, mxPow));
                }
                hashes.Sort(tupleCompare);
                int p = -1;
                for (int i = 0; i + mid <= b.Length; ++i)
                {
                    if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
                    {
                        p = i;
                        break;
                    }
                }
                if (p >= 0)
                {
                    low = mid;
                    pos = p;
                }
                else
                {
                    high = mid;
                }
            }
            // Output answer:
            return pos >= 0
                ? b.Substring(pos, low)
                : string.Empty;
        }
    }
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2013-09-12
      • 2015-06-24
      • 2012-12-28
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多