13-4-Graph-algorithm-recognise-Botnet.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from datasets import Datasets
  2. import networkx as nx
  3. import matplotlib.pyplot as plt
  4. # 相似度
  5. N = 0.5
  6. # 黑客团伙IP最少个数
  7. M = 3
  8. # 黑客IP攻击目标最小个数
  9. R = 2
  10. # jarccard系数(交集与并集的个数)
  11. def get_len(d1, d2):
  12. ds1 = set()
  13. for d in d1.keys():
  14. ds1.add(d)
  15. ds2 = set()
  16. for d in d2.keys():
  17. ds2.add(d)
  18. return len(ds1 & ds2) / len(ds1 | ds2)
  19. def main():
  20. ip_list = Datasets.load_secrepo()
  21. good_ip_list = {}
  22. G = nx.Graph()
  23. # 攻击的域名超过R的IP才列入统计范围
  24. for ip in ip_list.keys():
  25. if len(ip_list[ip]) >= R:
  26. good_ip_list[ip] = 1
  27. # 满足阈值的IP导入图数据库
  28. for ip1 in ip_list.keys():
  29. for ip2 in ip_list.keys():
  30. if not ip1 == ip2:
  31. weight = get_len(ip_list[ip1], ip_list[ip2])
  32. if (weight >= N) and (ip1 in good_ip_list.keys()) and (ip2 in good_ip_list.keys()):
  33. # 点不存在会自动添加
  34. G.add_edge(ip1, ip2, weight=weight)
  35. # 连通分量数目
  36. n_sub_graphs = nx.number_connected_components(G)
  37. # 最大连通子图
  38. sub_graphs = list(G.subgraph(c) for c in nx.connected_components(G))
  39. # 当同一团伙的IP大于等于M时才显示结果
  40. for i, sub_graph in enumerate(sub_graphs):
  41. n_nodes = len(sub_graph.nodes())
  42. if n_nodes >= M:
  43. print("Subgraph {0} has {1} nodes {2}".format(i, n_nodes, sub_graph.nodes()))
  44. nx.draw(G)
  45. plt.show()
  46. if __name__ == "__main__":
  47. main()