123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- from datasets import Datasets
- import networkx as nx
- import matplotlib.pyplot as plt
- # 相似度
- N = 0.5
- # 黑客团伙IP最少个数
- M = 3
- # 黑客IP攻击目标最小个数
- R = 2
- # jarccard系数(交集与并集的个数)
- def get_len(d1, d2):
- ds1 = set()
- for d in d1.keys():
- ds1.add(d)
- ds2 = set()
- for d in d2.keys():
- ds2.add(d)
- return len(ds1 & ds2) / len(ds1 | ds2)
- def main():
- ip_list = Datasets.load_secrepo()
- good_ip_list = {}
- G = nx.Graph()
- # 攻击的域名超过R的IP才列入统计范围
- for ip in ip_list.keys():
- if len(ip_list[ip]) >= R:
- good_ip_list[ip] = 1
- # 满足阈值的IP导入图数据库
- for ip1 in ip_list.keys():
- for ip2 in ip_list.keys():
- if not ip1 == ip2:
- weight = get_len(ip_list[ip1], ip_list[ip2])
- if (weight >= N) and (ip1 in good_ip_list.keys()) and (ip2 in good_ip_list.keys()):
- # 点不存在会自动添加
- G.add_edge(ip1, ip2, weight=weight)
- # 连通分量数目
- n_sub_graphs = nx.number_connected_components(G)
- # 最大连通子图
- sub_graphs = list(G.subgraph(c) for c in nx.connected_components(G))
- # 当同一团伙的IP大于等于M时才显示结果
- for i, sub_graph in enumerate(sub_graphs):
- n_nodes = len(sub_graph.nodes())
- if n_nodes >= M:
- print("Subgraph {0} has {1} nodes {2}".format(i, n_nodes, sub_graph.nodes()))
- nx.draw(G)
- plt.show()
- if __name__ == "__main__":
- main()
|