accuracytest.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import xlrd
  2. from utils.fileio import load_location, load_data_purpose_split, list_to_excel
  3. def test_recall_accuracy(suspected_node_list, source):
  4. """
  5. 查全率=(检索出的相关信息量/系统中的相关信息总量)
  6. :param suspected_node_list:
  7. :param source:
  8. :return:
  9. """
  10. location_dict = load_location("项目校对表-旧.xlsx")
  11. location_num = len(location_dict.keys())
  12. recall_location = 0
  13. recall_accurate = 0
  14. print("准确的结果如下:")
  15. for node in suspected_node_list:
  16. if node.private_info is None:
  17. node.private_info = [(key_word[0], node.purpose) for key_word in node.private_word_list]
  18. print((node.file_path.replace(source + '\\', ''), node.line_no, node.private_info,node.purpose))
  19. if (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no) in location_dict.keys():
  20. recall_location += 1
  21. # print(node)
  22. # print(location_dict[(node.file_path.replace(source + '/', ''), node.line_no)])
  23. # print()
  24. if node.private_info == location_dict[
  25. (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no)]:
  26. recall_accurate += 1
  27. if recall_location>0:
  28. print("查全率为: ", recall_accurate, "/", recall_location, '/', location_num, '/', recall_location / location_num)
  29. print("查准率为: ", recall_accurate, "/", recall_location, '/', len(suspected_node_list), '/',
  30. recall_location / len(suspected_node_list))
  31. return {"recall_accurate": recall_accurate, "recall_location": recall_location, "location_num": location_num}
  32. def test_missed(suspected_node_list, source):
  33. location_dict = load_location("项目校对表-旧.xlsx")
  34. paths = [(node.file_path.replace(source + "\\", '').replace("\\", "/"), node.line_no) for node in
  35. suspected_node_list]
  36. res = []
  37. for node in location_dict.keys():
  38. if node not in paths:
  39. # print("未命中:" + node[0] + str(node[1]))
  40. res.append("未命中:" + node[0] + str(node[1]))
  41. else:
  42. # print("命中:" + node[0] + str(node[1]))
  43. res.append("命中:" + node[0] + str(node[1]))
  44. paths = [paths[0],paths[1],]
  45. return {"suspected_node_list": paths, "missed": res}
  46. def test_stamp(stamp):
  47. # print(stamp)
  48. data_type_compute = []
  49. purpose_compute = []
  50. for st in stamp:
  51. loc = st[0] + ' ' + str(st[1])
  52. datatype = list(set([data[0] for data in st[2] if data[0] != 'Data']))
  53. purpose = list(set([data[1] for data in st[2] if data[1] != 'Usage']))
  54. for dt in datatype:
  55. data_type_compute.append((loc, dt))
  56. for pur in purpose:
  57. purpose_compute.append((loc, pur))
  58. data_type_list, purpose_list = load_data_purpose_split("/Users/liufan/program/PYTHON/SAP/privacyScanLsn/项目校对表-旧.xlsx")
  59. for data in data_type_list:
  60. if data not in data_type_compute:
  61. print(data)
  62. for pur in purpose_list:
  63. if pur not in purpose_compute:
  64. print(pur)
  65. data_type_all = list(set(data_type_compute + data_type_list))
  66. purpose_all = list(set(purpose_list + purpose_compute))
  67. print("data_type准确率为: ", len(data_type_compute), "/", len(data_type_all),
  68. len(data_type_compute) / len(data_type_all))
  69. print("purpose准确率为: ", len(purpose_compute), "/", len(purpose_all),
  70. len(purpose_compute) / len(purpose_all))
  71. list_to_excel(r'analyze/output/cmdb-python-master-标准.xls', data_type_all, purpose_all)
  72. if __name__ == '__main__':
  73. # load_location("项目校对表.xlsx")
  74. test_stamp()