效果1:
去掉字符串中回车,进行改进后效果:
代码:
Function LevenshteinDistance(s As String, t As String) As Integer
Dim d() As Integer
Dim i As Integer
Dim j As Integer
Dim cost As Integer
Dim sLen As Integer
Dim tLen As Integer
sLen = Len(s)
tLen = Len(t)
ReDim d(sLen, tLen)
For i = 0 To sLen
d(i, 0) = i
Next i
For j = 0 To tLen
d(0, j) = j
Next j
For i = 1 To sLen
For j = 1 To tLen
If mid(s, i, 1) = mid(t, j, 1) Then
cost = 0
Else
cost = 1
End If
d(i, j) = GetMinValue(GetMinValue(d(i - 1, j) + 1, d(i, j - 1) + 1), d(i - 1, j - 1) + cost)
Next j
Next i
LevenshteinDistance = d(sLen, tLen)
End Function
Function GetMinValue(ByVal Num1, ByVal Num2)
Dim MinValue As Double
MinValue = Num1
If Num2 < MinValue Then MinValue = Num2
GetMinValue = MinValue
End Function
Function similarity1(s As String, t As String) As Double
Dim maxLen As Integer
Dim dist As Integer
If Len(s) > Len(t) Then
maxLen = Len(s)
Else
maxLen = Len(t)
End If
If maxLen = 0 Then
similarity1 = 1# ' 如果两个字符串都为空,视为完全相似
Exit Function
End If
dist = LevenshteinDistance(s, t)
similarity1 = 1# - (dist / maxLen)
End Function
Sub TestSimilarity()
Dim str1 As String
Dim str2 As String
Dim similarity As Double
str1 = ActiveDocument.Content.Paragraphs(1).Range.text
str2 = ActiveDocument.Content.Paragraphs(3).Range.text
str1 = Replace(str1, vbCr, "")
str2 = Replace(str2, vbCr, "")
similarity = similarity1(str1, str2)
MsgBox "文本相似度: " & Format(similarity, "0.00%")
End Sub