我为您的问题提供了解决方案,并已上传包含您的示例字符串对的工作簿。这是workbook。
我的代码基于Needleman–Wunsch algorithm,它于 1970 年首次开发,至今仍用于在科学技术中对齐 DNA 序列。但是我修改了算法并添加了额外的后处理来处理您的示例数据字符串对。
这里是如何工作的过程。在 A1 和 A2 中输入要比较的两个字符串。
按 Alt-F8 并运行宏,AlignStrings。
结果将显示在单元格 A5 和 A6 中。
请注意,其他示例字符串对可以在表格的下方找到,从单元格 A21 开始。
这是工作簿中完成字符串对对齐和突出显示差异的代码:
Public Sub AlignStrings()
Dim a() As Byte, b() As Byte, a_$, b_$, i&, j&, d&, u&, l&, x&, y&, f&()
Const GAP = -1
Const PAD = "_"
a = [a1].Text: b = [a2].Text
[a3:a6].Clear
[a1:a6].Font.Name = "Courier New"
ReDim f(0 To UBound(b) \ 2 + 1, 0 To UBound(a) \ 2 + 1)
For i = 1 To UBound(f, 1)
For j = 1 To UBound(f, 2)
x = j - 1: y = i - 1
If a(x * 2) = b(y * 2) Then
d = 1 + f(y, x)
u = 0 + f(y, j)
l = 0 + f(i, x)
Else
d = -1 + f(y, x)
u = GAP + f(y, j)
l = GAP + f(i, x)
End If
f(i, j) = Max(d, u, l)
Next
Next
i = UBound(f, 1): j = UBound(f, 2)
On Error Resume Next
Do
x = j - 1: y = i - 1
d = f(y, x)
u = f(y, j)
l = f(i, x)
Select Case True
Case Err
If y < 0 Then GoTo left Else GoTo up
Case d >= u And d >= l Or Mid$(a, j, 1) = Mid$(b, i, 1)
diag:
a_ = Mid$(a, j, 1) & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1: j = j - 1
Case u > l
up:
a_ = PAD & a_
b_ = Mid$(b, i, 1) & b_
i = i - 1
Case l > u
left:
a_ = Mid$(a, j, 1) & a_
b_ = PAD & b_
j = j - 1
End Select
Loop Until i < 1 And j < 1
DecorateStrings a_, b_, [a5], [a6], PAD
End Sub
Private Function Max(a&, b&, c&) As Long
Max = a
If b > a Then Max = b
If c > b Then Max = c
End Function
Private Sub DecorateStrings(a$, b$, rOutA As Range, rOutB As Range, PAD$)
Dim i&, j&
FloatArtifacts a, b, PAD
FloatArtifacts b, a, PAD
rOutA = a
rOutB = b
For i = 1 To Len(a)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(a, i, 1) <> PAD Then
rOutA.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
For i = 1 To Len(b)
If Mid$(a, i, 1) <> Mid$(b, i, 1) Then
If Mid$(b, i, 1) <> PAD Then
rOutB.Characters(i, 1).Font.Color = vbRed
End If
End If
Next
End Sub
Private Sub FloatArtifacts(s1$, s2$, PAD$)
Dim c&, k&, i&, p&
For i = 1 To Len(s1)
c = InStr(i, s1, PAD)
If c Then
k = 0
Do
k = k + 1
If Mid$(s1, c + k, 1) <> PAD Then
If Mid$(s2, c, 1) = Mid$(s1, c + k, 1) Then
p = InStr(c + k, s1, PAD)
If p < (c + k + 6) And p > 0 Then
Mid$(s1, c, 1) = Mid$(s1, c + k, 1)
Mid$(s1, c + k, 1) = PAD
i = c
Exit Do
Else
i = c + k
Exit Do
End If
Else
i = c + k
Exit Do
End If
End If
If c + k > Len(s1) Then Exit Do
Loop
Else
Exit For
End If
Next
End Sub