FuzzyMatch.vba 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. Option Explicit
  2. Type RankInfo
  3. Offset As Integer
  4. Percentage As Single
  5. End Type
  6. Function FuzzyPercent(ByVal String1 As String, _
  7. ByVal String2 As String, _
  8. Optional Algorithm As Integer = 3, _
  9. Optional Normalised As Boolean = False) As Single
  10. '*************************************
  11. '** Return a % match on two strings **
  12. '*************************************
  13. Dim intLen1 As Integer, intLen2 As Integer
  14. Dim intCurLen As Integer
  15. Dim intTo As Integer
  16. Dim intPos As Integer
  17. Dim intPtr As Integer
  18. Dim intScore As Integer
  19. Dim intTotScore As Integer
  20. Dim intStartPos As Integer
  21. Dim strWork As String
  22. '-------------------------------------------------------
  23. '-- If strings havent been normalised, normalise them --
  24. '-------------------------------------------------------
  25. If Normalised = False Then
  26. String1 = LCase$(Application.Trim(String1))
  27. String2 = LCase$(Application.Trim(String2))
  28. End If
  29. '----------------------------------------------
  30. '-- Give 100% match if strings exactly equal --
  31. '----------------------------------------------
  32. If String1 = String2 Then
  33. FuzzyPercent = 1
  34. Exit Function
  35. End If
  36. intLen1 = Len(String1)
  37. intLen2 = Len(String2)
  38. '----------------------------------------
  39. '-- Give 0% match if string length < 2 --
  40. '----------------------------------------
  41. If intLen1 < 2 Then
  42. FuzzyPercent = 0
  43. Exit Function
  44. End If
  45. intTotScore = 0 'initialise total possible score
  46. intScore = 0 'initialise current score
  47. 'Taken from: http://www.mrexcel.com/forum/excel-questions/195635-fuzzy-matching-new-version-plus-explanation.html
  48. '--------------------------------------------------------
  49. '-- If Algorithm = 1 or 3, Search for single characters --
  50. '--------------------------------------------------------
  51. If (Algorithm And 1) <> 0 Then
  52. FuzzyAlg1 String1, String2, intScore, intTotScore
  53. If intLen1 < intLen2 Then FuzzyAlg1 String2, String1, intScore, intTotScore
  54. End If
  55. '-----------------------------------------------------------
  56. '-- If Algorithm = 2 or 3, Search for pairs, triplets etc. --
  57. '-----------------------------------------------------------
  58. If (Algorithm And 2) <> 0 Then
  59. FuzzyAlg2 String1, String2, intScore, intTotScore
  60. If intLen1 < intLen2 Then FuzzyAlg2 String2, String1, intScore, intTotScore
  61. End If
  62. FuzzyPercent = intScore / intTotScore
  63. End Function
  64. Private Sub FuzzyAlg1(ByVal String1 As String, _
  65. ByVal String2 As String, _
  66. ByRef Score As Integer, _
  67. ByRef TotScore As Integer)
  68. Dim intLen1 As Integer, intPos As Integer, intPtr As Integer, intStartPos As Integer
  69. intLen1 = Len(String1)
  70. TotScore = TotScore + intLen1 'update total possible score
  71. intPos = 0
  72. For intPtr = 1 To intLen1
  73. intStartPos = intPos + 1
  74. intPos = InStr(intStartPos, String2, Mid$(String1, intPtr, 1))
  75. If intPos > 0 Then
  76. If intPos > intStartPos + 3 Then 'No match if char is > 3 bytes away
  77. intPos = intStartPos
  78. Else
  79. Score = Score + 1 'Update current score
  80. End If
  81. Else
  82. intPos = intStartPos
  83. End If
  84. Next intPtr
  85. End Sub
  86. Private Sub FuzzyAlg2(ByVal String1 As String, _
  87. ByVal String2 As String, _
  88. ByRef Score As Integer, _
  89. ByRef TotScore As Integer)
  90. Dim intCurLen As Integer, intLen1 As Integer, intTo As Integer, intPtr As Integer, intPos As Integer
  91. Dim strWork As String
  92. intLen1 = Len(String1)
  93. For intCurLen = 2 To intLen1
  94. strWork = String2 'Get a copy of String2
  95. intTo = intLen1 - intCurLen + 1
  96. TotScore = TotScore + Int(intLen1 / intCurLen) 'Update total possible score
  97. For intPtr = 1 To intTo Step intCurLen
  98. intPos = InStr(strWork, Mid$(String1, intPtr, intCurLen))
  99. If intPos > 0 Then
  100. Mid$(strWork, intPos, intCurLen) = String$(intCurLen, &H0) 'corrupt found string
  101. Score = Score + 1 'Update current score
  102. End If
  103. Next intPtr
  104. Next intCurLen
  105. End Sub
  106. Function FuzzyVLookup(ByVal LookupValue As String, _
  107. ByVal TableArray As Range, _
  108. ByVal IndexNum As Integer, _
  109. Optional NFPercent As Single = 0.05, _
  110. Optional Rank As Integer = 1, _
  111. Optional Algorithm As Integer = 3, _
  112. Optional AdditionalCols As Integer = 0) As Variant
  113. '********************************************************************************
  114. '** Function to Fuzzy match LookupValue with entries in **
  115. '** column 1 of table specified by TableArray. **
  116. '** TableArray must specify the top left cell of the range to be searched **
  117. '** The function stops scanning the table when an empty cell in column 1 **
  118. '** is found. **
  119. '** For each entry in column 1 of the table, FuzzyPercent is called to match **
  120. '** LookupValue with the Table entry. **
  121. '** 'Rank' is an optional parameter which may take any value > 0 **
  122. '** (default 1) and causes the function to return the 'nth' best **
  123. '** match (where 'n' is defined by 'Rank' parameter) **
  124. '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
  125. '** IndexNum is the column number of the entry in TableArray required to be **
  126. '** returned, as follows: **
  127. '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent **
  128. '** (Default 5%) the column entry indicated by IndexNum is **
  129. '** returned. **
  130. '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent **
  131. '** (Default 5%) the offset row (starting at 1) is returned. **
  132. '** This value can be used directly in the 'Index' function. **
  133. '** **
  134. '** Algorithm can take one of the following values: **
  135. '** Algorithm = 1: **
  136. '** This algorithm is best suited for matching mis-spellings. **
  137. '** For each character in 'String1', a search is performed on 'String2'. **
  138. '** The search is deemed successful if a character is found in 'String2' **
  139. '** within 3 characters of the current position. **
  140. '** A score is kept of matching characters which is returned as a **
  141. '** percentage of the total possible score. **
  142. '** Algorithm = 2: **
  143. '** This algorithm is best suited for matching sentences, or **
  144. '** 'firstname lastname' compared with 'lastname firstname' combinations **
  145. '** A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
  146. '** 'String2' is returned as a percentage of the total possible. **
  147. '** Algorithm = 3: Both Algorithms 1 and 2 are performed. **
  148. '********************************************************************************
  149. Dim r As Range
  150. Dim strListString As String
  151. Dim strWork As String
  152. Dim sngMinPercent As Single
  153. Dim sngWork As Single
  154. Dim sngCurPercent As Single
  155. Dim intBestMatchPtr As Integer
  156. Dim intRankPtr As Integer
  157. Dim intRankPtr1 As Integer
  158. Dim i As Integer
  159. Dim lEndRow As Long
  160. Dim udRankData() As RankInfo
  161. Dim vCurValue As Variant
  162. '--------------------------------------------------------------
  163. '-- Validation --
  164. '--------------------------------------------------------------
  165. LookupValue = LCase$(Application.Trim(LookupValue))
  166. If IsMissing(NFPercent) Then
  167. sngMinPercent = 0.05
  168. Else
  169. If (NFPercent <= 0) Or (NFPercent > 1) Then
  170. FuzzyVLookup = "*** 'NFPercent' must be a percentage > zero ***"
  171. Exit Function
  172. End If
  173. sngMinPercent = NFPercent
  174. End If
  175. If Rank < 1 Then
  176. FuzzyVLookup = "*** 'Rank' must be an integer > 0 ***"
  177. Exit Function
  178. End If
  179. ReDim udRankData(1 To Rank)
  180. lEndRow = TableArray.Rows.Count
  181. If VarType(TableArray.Cells(lEndRow, 1).value) = vbEmpty Then
  182. lEndRow = TableArray.Cells(lEndRow, 1).End(xlUp).Row
  183. End If
  184. '---------------
  185. '-- Main loop --
  186. '---------------
  187. For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(lEndRow, 1))
  188. vCurValue = ""
  189. For i = 0 To AdditionalCols
  190. vCurValue = vCurValue & r.Offset(0, i).Text
  191. Next i
  192. If VarType(vCurValue) = vbString Then
  193. strListString = LCase$(Application.Trim(vCurValue))
  194. '------------------------------------------------
  195. '-- Fuzzy match strings & get percentage match --
  196. '------------------------------------------------
  197. sngCurPercent = FuzzyPercent(String1:=LookupValue, _
  198. String2:=strListString, _
  199. Algorithm:=Algorithm, _
  200. Normalised:=True)
  201. If sngCurPercent >= sngMinPercent Then
  202. '---------------------------
  203. '-- Store in ranked array --
  204. '---------------------------
  205. For intRankPtr = 1 To Rank
  206. If sngCurPercent > udRankData(intRankPtr).Percentage Then
  207. For intRankPtr1 = Rank To intRankPtr + 1 Step -1
  208. With udRankData(intRankPtr1)
  209. .Offset = udRankData(intRankPtr1 - 1).Offset
  210. .Percentage = udRankData(intRankPtr1 - 1).Percentage
  211. End With
  212. Next intRankPtr1
  213. With udRankData(intRankPtr)
  214. .Offset = r.Row
  215. .Percentage = sngCurPercent
  216. End With
  217. Exit For
  218. End If
  219. Next intRankPtr
  220. End If
  221. End If
  222. Next r
  223. If udRankData(Rank).Percentage < sngMinPercent Then
  224. '--------------------------------------
  225. '-- Return '#N/A' if below NFPercent --
  226. '--------------------------------------
  227. FuzzyVLookup = CVErr(xlErrNA)
  228. Else
  229. intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).Row + 1
  230. If IndexNum > 0 Then
  231. '-----------------------------------
  232. '-- Return column entry specified --
  233. '-----------------------------------
  234. FuzzyVLookup = TableArray.Cells(intBestMatchPtr, IndexNum)
  235. Else
  236. '-----------------------
  237. '-- Return offset row --
  238. '-----------------------
  239. FuzzyVLookup = intBestMatchPtr
  240. End If
  241. End If
  242. End Function
  243. Function FuzzyHLookup(ByVal LookupValue As String, _
  244. ByVal TableArray As Range, _
  245. ByVal IndexNum As Integer, _
  246. Optional NFPercent As Single = 0.05, _
  247. Optional Rank As Integer = 1, _
  248. Optional Algorithm As Integer = 3) As Variant
  249. '********************************************************************************
  250. '** Function to Fuzzy match LookupValue with entries in **
  251. '** row 1 of table specified by TableArray. **
  252. '** TableArray must specify the top left cell of the range to be searched **
  253. '** The function stops scanning the table when an empty cell in row 1 **
  254. '** is found. **
  255. '** For each entry in row 1 of the table, FuzzyPercent is called to match **
  256. '** LookupValue with the Table entry. **
  257. '** 'Rank' is an optional parameter which may take any value > 0 **
  258. '** (default 1) and causes the function to return the 'nth' best **
  259. '** match (where 'n' is defined by 'Rank' parameter) **
  260. '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
  261. '** IndexNum is the row number of the entry in TableArray required to be **
  262. '** returned, as follows: **
  263. '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent **
  264. '** (Default 5%) the row entry indicated by IndexNum is **
  265. '** returned. **
  266. '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent **
  267. '** (Default 5%) the offset col (starting at 0) is returned. **
  268. '** This value can be used directly in the 'OffSet' function. **
  269. '** **
  270. '** Algorithm can take one of the following values: **
  271. '** Algorithm = 1: **
  272. '** For each character in 'String1', a search is performed on 'String2'. **
  273. '** The search is deemed successful if a character is found in 'String2' **
  274. '** within 3 characters of the current position. **
  275. '** A score is kept of matching characters which is returned as a **
  276. '** percentage of the total possible score. **
  277. '** Algorithm = 2: **
  278. '** A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
  279. '** 'String2' is returned as a percentage of the total possible. **
  280. '** Algorithm = 3: Both Algorithms 1 and 2 are performed. **
  281. '********************************************************************************
  282. Dim r As Range
  283. Dim strListString As String
  284. Dim strWork As String
  285. Dim sngMinPercent As Single
  286. Dim sngWork As Single
  287. Dim sngCurPercent As Single
  288. Dim intBestMatchPtr As Integer
  289. Dim intPtr As Integer
  290. Dim intRankPtr As Integer
  291. Dim intRankPtr1 As Integer
  292. Dim iEndCol As Integer
  293. Dim udRankData() As RankInfo
  294. Dim vCurValue As Variant
  295. '--------------------------------------------------------------
  296. '-- Validation --
  297. '--------------------------------------------------------------
  298. LookupValue = LCase$(Application.Trim(LookupValue))
  299. If IsMissing(NFPercent) Then
  300. sngMinPercent = 0.05
  301. Else
  302. If (NFPercent <= 0) Or (NFPercent > 1) Then
  303. FuzzyHLookup = "*** 'NFPercent' must be a percentage > zero ***"
  304. Exit Function
  305. End If
  306. sngMinPercent = NFPercent
  307. End If
  308. If Rank < 1 Then
  309. FuzzyHLookup = "*** 'Rank' must be an integer > 0 ***"
  310. Exit Function
  311. End If
  312. ReDim udRankData(1 To Rank)
  313. '**************************
  314. iEndCol = TableArray.Columns.Count
  315. If VarType(TableArray.Cells(1, iEndCol).value) = vbEmpty Then
  316. iEndCol = TableArray.Cells(1, iEndCol).End(xlToLeft).Column
  317. End If
  318. '---------------
  319. '-- Main loop --
  320. '---------------
  321. For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(1, iEndCol))
  322. vCurValue = r.value
  323. If VarType(vCurValue) = vbString Then
  324. strListString = LCase$(Application.Trim(vCurValue))
  325. '------------------------------------------------
  326. '-- Fuzzy match strings & get percentage match --
  327. '------------------------------------------------
  328. sngCurPercent = FuzzyPercent(String1:=LookupValue, _
  329. String2:=strListString, _
  330. Algorithm:=Algorithm, _
  331. Normalised:=True)
  332. If sngCurPercent >= sngMinPercent Then
  333. '---------------------------
  334. '-- Store in ranked array --
  335. '---------------------------
  336. For intRankPtr = 1 To Rank
  337. If sngCurPercent > udRankData(intRankPtr).Percentage Then
  338. For intRankPtr1 = Rank To intRankPtr + 1 Step -1
  339. With udRankData(intRankPtr1)
  340. .Offset = udRankData(intRankPtr1 - 1).Offset
  341. .Percentage = udRankData(intRankPtr1 - 1).Percentage
  342. End With
  343. Next intRankPtr1
  344. With udRankData(intRankPtr)
  345. .Offset = r.Column
  346. .Percentage = sngCurPercent
  347. End With
  348. Exit For
  349. End If
  350. Next intRankPtr
  351. End If
  352. End If
  353. Next r
  354. If udRankData(Rank).Percentage < sngMinPercent Then
  355. '--------------------------------------
  356. '-- Return '#N/A' if below NFPercent --
  357. '--------------------------------------
  358. FuzzyHLookup = CVErr(xlErrNA)
  359. Else
  360. intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).Column + 1
  361. If IndexNum > 0 Then
  362. '-----------------------------------
  363. '-- Return row entry specified --
  364. '-----------------------------------
  365. FuzzyHLookup = TableArray.Cells(IndexNum, intBestMatchPtr)
  366. Else
  367. '-----------------------
  368. '-- Return offset col --
  369. '-----------------------
  370. FuzzyHLookup = intBestMatchPtr
  371. End If
  372. End If
  373. End Function