13-3-Graph-algorithm-recognise-WebShell.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. from neo4j import GraphDatabase, basic_auth
  2. import re
  3. # 先清空数据库,确保在一个空白的环境 match (n) detach delete n
  4. # 导入数据
  5. def import_data(session):
  6. nodes = {}
  7. index = 1
  8. # 读取数据
  9. file_object = open('data/r-graph.txt', 'r')
  10. try:
  11. for line in file_object:
  12. match_obj = re.match(r'(\S+) -> (\S+)', line, re.M | re.I)
  13. if match_obj:
  14. path = match_obj.group(1)
  15. ref = match_obj.group(2)
  16. # 节点不存在则新建
  17. if path not in nodes.keys():
  18. path_node = "Page%d" % index
  19. nodes[path] = path_node
  20. sql = "create (%s:Page {url:\"%s\" , id:\"%d\",in:0,out:0})" % (path_node, path, index)
  21. index = index + 1
  22. session.run(sql)
  23. # print(sql)
  24. # 节点不存在新建
  25. if ref not in nodes.keys():
  26. ref_node = "Page%d" % index
  27. nodes[ref] = ref_node
  28. sql = "create (%s:Page {url:\"%s\",id:\"%d\",in:0,out:0})" % (ref_node, ref, index)
  29. index = index + 1
  30. session.run(sql)
  31. # print(sql)
  32. # 关联关系
  33. sql = "MATCH (out:Page {url:\"%s\"}), (in:Page {url:\"%s\"}) MERGE (out)-[:IN]->(in)" % (path, ref)
  34. # sql = "match (%s)-[:IN]->(%s)" % (path_node, ref_node)
  35. session.run(sql)
  36. # print(sql)
  37. # 出度
  38. sql = "match (n:Page {url:\"%s\"}) SET n.out=n.out+1" % path
  39. session.run(sql)
  40. # print(sql)
  41. # 入度
  42. sql = "match (n:Page {url:\"%s\"}) SET n.in=n.in+1" % ref
  43. session.run(sql)
  44. # print(sql)
  45. finally:
  46. file_object.close()
  47. def main():
  48. # 连接数据库
  49. driver = GraphDatabase.driver(
  50. "bolt://52.90.194.108:7687",
  51. auth=basic_auth("neo4j", "transmitters-amusements-saturdays"))
  52. session = driver.session()
  53. # 导入数据
  54. import_data(session)
  55. # 查询入度为1出度均为0的节点或者查询入度出度均为1且指向自己的节点
  56. sql = "MATCH (n:Page) where (n.in=1 and n.out=0) or (n.in=1 and n.out=1) RETURN n.url"
  57. results = session.run(sql)
  58. for result in results:
  59. print("疑是webshell: %s" % result["n.url"])
  60. # 关闭连接
  61. session.close()
  62. if __name__ == "__main__":
  63. main()