编程语言
首页 > 编程语言> > 用VB实现字符串相似度算法(编辑距离算法 Levenshtein Distance)

用VB实现字符串相似度算法(编辑距离算法 Levenshtein Distance)

作者:互联网

原文来自Angel_Kitty《用C#实现字符串相似度算法(编辑距离算法 Levenshtein Distance)》

把代码翻译成了VB,具体描述请阅读作者的原文。

Public Class SearchHelper
    ''' <summary>
    ''' 对结果进行排序,不能够直接使用相似度进行排序。因为相似度并没有考虑到句子的长度。
    ''' <br/>按照使用习惯,通常会把匹配度高,并且句子长度短的放在前面。
    ''' <br/>这就得到了排序因子:(不匹配度+0.5)/句子长度。
    ''' </summary>
    ''' <param name="param">要比较的源字符串</param>
    ''' <param name="items">用来比较的关键字字符串数组</param>
    ''' <returns></returns>
    Public Function Search(param As String, items As String()) As String()
        If String.IsNullOrWhiteSpace(param) AndAlso IsNothing(items) AndAlso items.Length = 0 Then
            Dim result(0) As String
            Return result
        End If


        Dim words As String() = param.Split(New Char() {" ", "\u3000"}, StringSplitOptions.RemoveEmptyEntries).OrderBy(Of Integer)(Function(item) item.Length).ToArray()


        Dim q = From sentence In items.AsParallel()
                Let MLL = Mul_LnCS_Length(sentence, words)
                Where MLL >= 0
                Order By (MLL + 0.5) / sentence.Length, sentence
                Select sentence
        Return q.ToArray()
    End Function



    ''' <summary>
    ''' 编辑距离(多字符串)
    ''' </summary>
    ''' <param name="sentence"></param>
    ''' <param name="words">多个关键字。长度必须大于0,必须按照字符串长度升序排列。</param>
    ''' <returns></returns>
    Public Function Mul_LnCS_Length(sentence As String, words As String()) As Integer

        Dim sLength As Integer = sentence.Length
        Dim result As Integer = sLength
        Dim flags(sLength) As Boolean
        Dim C(sLength + 1, words(words.Length - 1).Length + 1) As Integer
        'int[,] C = New int[sLength + 1, words.Select(s => s.Length).Max() + 1];
        For Each word As String In words

            Dim wLength As Integer = word.Length
            Dim first As Integer = 0, last = 0
            Dim i, j, LCS_L As Integer

            'foreach 速度会有所提升,还可以加剪枝
            For i = 0 To sLength - 1
                For j = 0 To wLength - 1
                    If sentence(i) = word(j) Then
                        C(i + 1, j + 1) = C(i, j) + 1
                        If (first < C(i, j)) Then
                            last = i
                            first = C(i, j)
                        End If
                    Else
                        C(i + 1, j + 1) = Math.Max(C(i, j + 1), C(i + 1, j))
                    End If
                Next
                LCS_L = C(i, j)
            Next

            While i > 0 AndAlso j > 0
                If C(i - 1, j - 1) + 1 = C(i, j) Then
                    i -= 1
                    j -= 1
                    If flags(i) = False Then
                        flags(i) = True
                        result -= 1
                    End If
                    first = i
                ElseIf (C(i - 1, j) = C(i, j)) Then
                    i -= 1
                Else    ' If (C(i, j - 1) = C(i, j))
                    j -= 1
                End If
            End While

            If LCS_L <= (last - first + 1) >> 1 Then
                Return -1
            End If
        Next

        Return result
    End Function

    Public Function Distance(str1 As String, str2 As String) As Integer
        Dim n As Integer = str1.Length
        Dim m As Integer = str2.Length

        Dim C(n + 1, m + 1) As Integer
        Dim i, j, x, y, z As Integer
        For i = 0 To n
            C(i, 0) = i
        Next
        For i = 1 To m
            C(0, i) = i
        Next
        For i = 0 To n - 1
            For j = 0 To m - 1
                x = C(i, j + 1) + 1
                y = C(i + 1, j) + 1
                If (str1(i) = str2(j)) Then
                    z = C(i, j)
                Else
                    z = C(i, j) + 1
                End If
                C(i + 1, j + 1) = Math.Min(Math.Min(x, y), z)
            Next
        Next
        Return C(n, m)
    End Function
End Class

 

标签:Distance,VB,End,String,Dim,sentence,Length,算法,Integer
来源: https://www.cnblogs.com/ovbm/p/16299307.html