123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- Option Explicit
- Type RankInfo
- Offset As Integer
- Percentage As Single
- End Type
- Function FuzzyPercent(ByVal String1 As String, _
- ByVal String2 As String, _
- Optional Algorithm As Integer = 3, _
- Optional Normalised As Boolean = False) As Single
- '*************************************
- '** Return a % match on two strings **
- '*************************************
- Dim intLen1 As Integer, intLen2 As Integer
- Dim intCurLen As Integer
- Dim intTo As Integer
- Dim intPos As Integer
- Dim intPtr As Integer
- Dim intScore As Integer
- Dim intTotScore As Integer
- Dim intStartPos As Integer
- Dim strWork As String
- '-------------------------------------------------------
- '-- If strings havent been normalised, normalise them --
- '-------------------------------------------------------
- If Normalised = False Then
- String1 = LCase$(Application.Trim(String1))
- String2 = LCase$(Application.Trim(String2))
- End If
- '----------------------------------------------
- '-- Give 100% match if strings exactly equal --
- '----------------------------------------------
- If String1 = String2 Then
- FuzzyPercent = 1
- Exit Function
- End If
- intLen1 = Len(String1)
- intLen2 = Len(String2)
- '----------------------------------------
- '-- Give 0% match if string length < 2 --
- '----------------------------------------
- If intLen1 < 2 Then
- FuzzyPercent = 0
- Exit Function
- End If
- intTotScore = 0 'initialise total possible score
- intScore = 0 'initialise current score
- 'Taken from: http://www.mrexcel.com/forum/excel-questions/195635-fuzzy-matching-new-version-plus-explanation.html
- '--------------------------------------------------------
- '-- If Algorithm = 1 or 3, Search for single characters --
- '--------------------------------------------------------
- If (Algorithm And 1) <> 0 Then
- FuzzyAlg1 String1, String2, intScore, intTotScore
- If intLen1 < intLen2 Then FuzzyAlg1 String2, String1, intScore, intTotScore
- End If
- '-----------------------------------------------------------
- '-- If Algorithm = 2 or 3, Search for pairs, triplets etc. --
- '-----------------------------------------------------------
- If (Algorithm And 2) <> 0 Then
- FuzzyAlg2 String1, String2, intScore, intTotScore
- If intLen1 < intLen2 Then FuzzyAlg2 String2, String1, intScore, intTotScore
- End If
- FuzzyPercent = intScore / intTotScore
- End Function
- Private Sub FuzzyAlg1(ByVal String1 As String, _
- ByVal String2 As String, _
- ByRef Score As Integer, _
- ByRef TotScore As Integer)
- Dim intLen1 As Integer, intPos As Integer, intPtr As Integer, intStartPos As Integer
- intLen1 = Len(String1)
- TotScore = TotScore + intLen1 'update total possible score
- intPos = 0
- For intPtr = 1 To intLen1
- intStartPos = intPos + 1
- intPos = InStr(intStartPos, String2, Mid$(String1, intPtr, 1))
- If intPos > 0 Then
- If intPos > intStartPos + 3 Then 'No match if char is > 3 bytes away
- intPos = intStartPos
- Else
- Score = Score + 1 'Update current score
- End If
- Else
- intPos = intStartPos
- End If
- Next intPtr
- End Sub
- Private Sub FuzzyAlg2(ByVal String1 As String, _
- ByVal String2 As String, _
- ByRef Score As Integer, _
- ByRef TotScore As Integer)
- Dim intCurLen As Integer, intLen1 As Integer, intTo As Integer, intPtr As Integer, intPos As Integer
- Dim strWork As String
- intLen1 = Len(String1)
- For intCurLen = 2 To intLen1
- strWork = String2 'Get a copy of String2
- intTo = intLen1 - intCurLen + 1
- TotScore = TotScore + Int(intLen1 / intCurLen) 'Update total possible score
- For intPtr = 1 To intTo Step intCurLen
- intPos = InStr(strWork, Mid$(String1, intPtr, intCurLen))
- If intPos > 0 Then
- Mid$(strWork, intPos, intCurLen) = String$(intCurLen, &H0) 'corrupt found string
- Score = Score + 1 'Update current score
- End If
- Next intPtr
- Next intCurLen
- End Sub
- Function FuzzyVLookup(ByVal LookupValue As String, _
- ByVal TableArray As Range, _
- ByVal IndexNum As Integer, _
- Optional NFPercent As Single = 0.05, _
- Optional Rank As Integer = 1, _
- Optional Algorithm As Integer = 3, _
- Optional AdditionalCols As Integer = 0) As Variant
- '********************************************************************************
- '** Function to Fuzzy match LookupValue with entries in **
- '** column 1 of table specified by TableArray. **
- '** TableArray must specify the top left cell of the range to be searched **
- '** The function stops scanning the table when an empty cell in column 1 **
- '** is found. **
- '** For each entry in column 1 of the table, FuzzyPercent is called to match **
- '** LookupValue with the Table entry. **
- '** 'Rank' is an optional parameter which may take any value > 0 **
- '** (default 1) and causes the function to return the 'nth' best **
- '** match (where 'n' is defined by 'Rank' parameter) **
- '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
- '** IndexNum is the column number of the entry in TableArray required to be **
- '** returned, as follows: **
- '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent **
- '** (Default 5%) the column entry indicated by IndexNum is **
- '** returned. **
- '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent **
- '** (Default 5%) the offset row (starting at 1) is returned. **
- '** This value can be used directly in the 'Index' function. **
- '** **
- '** Algorithm can take one of the following values: **
- '** Algorithm = 1: **
- '** This algorithm is best suited for matching mis-spellings. **
- '** For each character in 'String1', a search is performed on 'String2'. **
- '** The search is deemed successful if a character is found in 'String2' **
- '** within 3 characters of the current position. **
- '** A score is kept of matching characters which is returned as a **
- '** percentage of the total possible score. **
- '** Algorithm = 2: **
- '** This algorithm is best suited for matching sentences, or **
- '** 'firstname lastname' compared with 'lastname firstname' combinations **
- '** A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
- '** 'String2' is returned as a percentage of the total possible. **
- '** Algorithm = 3: Both Algorithms 1 and 2 are performed. **
- '********************************************************************************
- Dim r As Range
- Dim strListString As String
- Dim strWork As String
- Dim sngMinPercent As Single
- Dim sngWork As Single
- Dim sngCurPercent As Single
- Dim intBestMatchPtr As Integer
- Dim intRankPtr As Integer
- Dim intRankPtr1 As Integer
- Dim i As Integer
- Dim lEndRow As Long
- Dim udRankData() As RankInfo
- Dim vCurValue As Variant
- '--------------------------------------------------------------
- '-- Validation --
- '--------------------------------------------------------------
- LookupValue = LCase$(Application.Trim(LookupValue))
- If IsMissing(NFPercent) Then
- sngMinPercent = 0.05
- Else
- If (NFPercent <= 0) Or (NFPercent > 1) Then
- FuzzyVLookup = "*** 'NFPercent' must be a percentage > zero ***"
- Exit Function
- End If
- sngMinPercent = NFPercent
- End If
- If Rank < 1 Then
- FuzzyVLookup = "*** 'Rank' must be an integer > 0 ***"
- Exit Function
- End If
- ReDim udRankData(1 To Rank)
- lEndRow = TableArray.Rows.Count
- If VarType(TableArray.Cells(lEndRow, 1).value) = vbEmpty Then
- lEndRow = TableArray.Cells(lEndRow, 1).End(xlUp).Row
- End If
- '---------------
- '-- Main loop --
- '---------------
- For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(lEndRow, 1))
- vCurValue = ""
- For i = 0 To AdditionalCols
- vCurValue = vCurValue & r.Offset(0, i).Text
- Next i
- If VarType(vCurValue) = vbString Then
- strListString = LCase$(Application.Trim(vCurValue))
-
- '------------------------------------------------
- '-- Fuzzy match strings & get percentage match --
- '------------------------------------------------
- sngCurPercent = FuzzyPercent(String1:=LookupValue, _
- String2:=strListString, _
- Algorithm:=Algorithm, _
- Normalised:=True)
-
- If sngCurPercent >= sngMinPercent Then
- '---------------------------
- '-- Store in ranked array --
- '---------------------------
- For intRankPtr = 1 To Rank
- If sngCurPercent > udRankData(intRankPtr).Percentage Then
- For intRankPtr1 = Rank To intRankPtr + 1 Step -1
- With udRankData(intRankPtr1)
- .Offset = udRankData(intRankPtr1 - 1).Offset
- .Percentage = udRankData(intRankPtr1 - 1).Percentage
- End With
- Next intRankPtr1
- With udRankData(intRankPtr)
- .Offset = r.Row
- .Percentage = sngCurPercent
- End With
- Exit For
- End If
- Next intRankPtr
- End If
-
- End If
- Next r
- If udRankData(Rank).Percentage < sngMinPercent Then
- '--------------------------------------
- '-- Return '#N/A' if below NFPercent --
- '--------------------------------------
- FuzzyVLookup = CVErr(xlErrNA)
- Else
- intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).Row + 1
- If IndexNum > 0 Then
- '-----------------------------------
- '-- Return column entry specified --
- '-----------------------------------
- FuzzyVLookup = TableArray.Cells(intBestMatchPtr, IndexNum)
- Else
- '-----------------------
- '-- Return offset row --
- '-----------------------
- FuzzyVLookup = intBestMatchPtr
- End If
- End If
- End Function
- Function FuzzyHLookup(ByVal LookupValue As String, _
- ByVal TableArray As Range, _
- ByVal IndexNum As Integer, _
- Optional NFPercent As Single = 0.05, _
- Optional Rank As Integer = 1, _
- Optional Algorithm As Integer = 3) As Variant
- '********************************************************************************
- '** Function to Fuzzy match LookupValue with entries in **
- '** row 1 of table specified by TableArray. **
- '** TableArray must specify the top left cell of the range to be searched **
- '** The function stops scanning the table when an empty cell in row 1 **
- '** is found. **
- '** For each entry in row 1 of the table, FuzzyPercent is called to match **
- '** LookupValue with the Table entry. **
- '** 'Rank' is an optional parameter which may take any value > 0 **
- '** (default 1) and causes the function to return the 'nth' best **
- '** match (where 'n' is defined by 'Rank' parameter) **
- '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
- '** IndexNum is the row number of the entry in TableArray required to be **
- '** returned, as follows: **
- '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent **
- '** (Default 5%) the row entry indicated by IndexNum is **
- '** returned. **
- '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent **
- '** (Default 5%) the offset col (starting at 0) is returned. **
- '** This value can be used directly in the 'OffSet' function. **
- '** **
- '** Algorithm can take one of the following values: **
- '** Algorithm = 1: **
- '** For each character in 'String1', a search is performed on 'String2'. **
- '** The search is deemed successful if a character is found in 'String2' **
- '** within 3 characters of the current position. **
- '** A score is kept of matching characters which is returned as a **
- '** percentage of the total possible score. **
- '** Algorithm = 2: **
- '** A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
- '** 'String2' is returned as a percentage of the total possible. **
- '** Algorithm = 3: Both Algorithms 1 and 2 are performed. **
- '********************************************************************************
- Dim r As Range
- Dim strListString As String
- Dim strWork As String
- Dim sngMinPercent As Single
- Dim sngWork As Single
- Dim sngCurPercent As Single
- Dim intBestMatchPtr As Integer
- Dim intPtr As Integer
- Dim intRankPtr As Integer
- Dim intRankPtr1 As Integer
- Dim iEndCol As Integer
- Dim udRankData() As RankInfo
- Dim vCurValue As Variant
- '--------------------------------------------------------------
- '-- Validation --
- '--------------------------------------------------------------
- LookupValue = LCase$(Application.Trim(LookupValue))
- If IsMissing(NFPercent) Then
- sngMinPercent = 0.05
- Else
- If (NFPercent <= 0) Or (NFPercent > 1) Then
- FuzzyHLookup = "*** 'NFPercent' must be a percentage > zero ***"
- Exit Function
- End If
- sngMinPercent = NFPercent
- End If
- If Rank < 1 Then
- FuzzyHLookup = "*** 'Rank' must be an integer > 0 ***"
- Exit Function
- End If
- ReDim udRankData(1 To Rank)
- '**************************
- iEndCol = TableArray.Columns.Count
- If VarType(TableArray.Cells(1, iEndCol).value) = vbEmpty Then
- iEndCol = TableArray.Cells(1, iEndCol).End(xlToLeft).Column
- End If
- '---------------
- '-- Main loop --
- '---------------
- For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(1, iEndCol))
- vCurValue = r.value
- If VarType(vCurValue) = vbString Then
- strListString = LCase$(Application.Trim(vCurValue))
-
- '------------------------------------------------
- '-- Fuzzy match strings & get percentage match --
- '------------------------------------------------
- sngCurPercent = FuzzyPercent(String1:=LookupValue, _
- String2:=strListString, _
- Algorithm:=Algorithm, _
- Normalised:=True)
-
- If sngCurPercent >= sngMinPercent Then
- '---------------------------
- '-- Store in ranked array --
- '---------------------------
- For intRankPtr = 1 To Rank
- If sngCurPercent > udRankData(intRankPtr).Percentage Then
- For intRankPtr1 = Rank To intRankPtr + 1 Step -1
- With udRankData(intRankPtr1)
- .Offset = udRankData(intRankPtr1 - 1).Offset
- .Percentage = udRankData(intRankPtr1 - 1).Percentage
- End With
- Next intRankPtr1
- With udRankData(intRankPtr)
- .Offset = r.Column
- .Percentage = sngCurPercent
- End With
- Exit For
- End If
- Next intRankPtr
- End If
-
- End If
- Next r
- If udRankData(Rank).Percentage < sngMinPercent Then
- '--------------------------------------
- '-- Return '#N/A' if below NFPercent --
- '--------------------------------------
- FuzzyHLookup = CVErr(xlErrNA)
- Else
- intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).Column + 1
- If IndexNum > 0 Then
- '-----------------------------------
- '-- Return row entry specified --
- '-----------------------------------
- FuzzyHLookup = TableArray.Cells(IndexNum, intBestMatchPtr)
- Else
- '-----------------------
- '-- Return offset col --
- '-----------------------
- FuzzyHLookup = intBestMatchPtr
- End If
- End If
- End Function
|