charactermatch.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import copy
  2. import difflib
  3. # import Levenshtein
  4. # duplicated
  5. def character_match_abbr(word_std, abbr, word):
  6. if word.find(word_std) != -1:
  7. return True
  8. while word.find(abbr[0]) != -1 and word.find(abbr[0]) + 3 <= len(word):
  9. word = word[word.find(abbr[0]):]
  10. copy_abbr = copy.deepcopy(abbr)
  11. flag = True
  12. for i in range(3):
  13. index = copy_abbr.find(word[0])
  14. if index == -1:
  15. flag = False
  16. break
  17. else:
  18. copy_abbr = copy_abbr[index:]
  19. word = word[1:]
  20. if flag:
  21. return True
  22. else:
  23. continue
  24. return False
  25. def character_match(word_std, word):
  26. """
  27. 模糊匹配
  28. Args:
  29. word_std:
  30. word:
  31. Returns:
  32. script_path
  33. """
  34. word, word_std = word.lower().replace("_", ""), word_std.lower()
  35. if word.find(word_std) != -1 or difflib.SequenceMatcher((lambda x: x in ["_", "/"]), word, word_std).ratio() > 0.9:
  36. return True
  37. else:
  38. return False
  39. def word_match(word_std_list, word):
  40. """
  41. Args:
  42. word_std_list: 可能的缩写类型
  43. word: 查询的单词
  44. Returns:
  45. True/False
  46. """
  47. if "ip" in word_std_list:
  48. word_std_list.remove("ip")
  49. if word == "ip" or word == 'IP' or word == 'Ip':
  50. return True
  51. for word_std in word_std_list:
  52. if character_match(word_std, word):
  53. return True
  54. else:
  55. continue
  56. return False
  57. def test_match(a, b):
  58. print(b.find(a) != -1)
  59. print(difflib.SequenceMatcher((lambda x: x in ["_", "/"]), a, b).ratio())
  60. print()
  61. if __name__ == '__main__':
  62. # print(word_match(["password", "pwd", "psw", "pswd"], "psd"))
  63. # print(word_match(["password", "pwd", "psw", "pswd"], "userpwd"))
  64. # print(word_match(["password", "pwd", "psw", "pswd"], "user_psw_1"))
  65. # print(word_match(["password", "pwd", "psw", "pswd"], "pwa"))
  66. # print(word_match(["password", "pwd", "psw", "pswd"], "passw"))
  67. # print(word_match(["password", "pwd", "psw", "pswd"], "passpsw"))
  68. # print(word_match(["password", "pwd", "psw", "pswd"], "user_password_a"))
  69. # print(word_match(["password", "pwd", "psw", "pswd"], "psw_a"))
  70. word_match(["pswd", "psw", "pwd", "password", "pass_word", "gitpass"], "gen_password")
  71. word_match(["key"], "gitkey")
  72. print(word_match(["Pseudonym", "alias"], "pseudonyms"))
  73. # word_match(["ipaddr", "IPAddress", "ip"], "output_dir")
  74. # word_match(["ipaddr", "IPAddress", "ip"], "os.path.pardir")
  75. # 包含+长度限制