AstGraph.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import ast
  2. import os
  3. import random
  4. from graph.common.nodetype import *
  5. from graph.common.graphtype import *
  6. from utils.fileio import *
  7. CUR_PATH = os.path.dirname(__file__)
  8. i = 0
  9. def ast_visit(node, graph=None, id_list=None):
  10. if graph is None:
  11. graph = list()
  12. if id_list is None:
  13. id_list = [node]
  14. iter_field = ast.iter_fields(node)
  15. for field, value in iter_field:
  16. # print(type(node).__name__, field, value)
  17. if isinstance(value, list):
  18. for item in value:
  19. if isinstance(item, ast.AST):
  20. id_list.append(item)
  21. graph.append(
  22. (id_list.index(node) + 1, NodeType.node_map[type(node).__name__], 1,
  23. id_list.index(item) + 1, NodeType.node_map[type(item).__name__])
  24. )
  25. ast_visit(item, graph, id_list)
  26. elif isinstance(value, ast.AST):
  27. id_list.append(value)
  28. graph.append(
  29. (id_list.index(node) + 1, NodeType.node_map[type(node).__name__], 1,
  30. id_list.index(value) + 1, NodeType.node_map[type(value).__name__])
  31. )
  32. ast_visit(value, graph, id_list)
  33. def gen_graph_from_file(file_path):
  34. with open(file_path, 'r') as file:
  35. content = file.read()
  36. func_graph = list()
  37. try:
  38. root = ast.parse(content)
  39. for node in root.body:
  40. if isinstance(node, ast.FunctionDef):
  41. ast_visit(node, func_graph)
  42. except IndentationError:
  43. print("IndentationError: ", file_path)
  44. except SyntaxError:
  45. print("SyntaxError: ", file_path)
  46. except:
  47. print("other: ", file_path)
  48. return func_graph
  49. def gen_graph_to_txt():
  50. path_ori = CUR_PATH + "/../../data/purposeSplit"
  51. kinds = os.listdir(path_ori)
  52. kinds.remove('.DS_Store')
  53. for kind in kinds:
  54. graph_type = GraphType.type[kind]
  55. path_out_train = CUR_PATH + "/../../data/traindata/train/" + str(kind) + ".txt"
  56. path_out_test = CUR_PATH + "/../../data/traindata/test/" + str(kind) + ".txt"
  57. file_path_list = walk_files(path_ori + "/" + kind)
  58. random.shuffle(file_path_list)
  59. lens = int(len(file_path_list) / 4 * 3)
  60. with open(path_out_train, 'w') as file:
  61. for file_path in file_path_list[:lens]:
  62. func_graph = gen_graph_from_file(file_path)
  63. for edge in func_graph:
  64. file.write(
  65. "" + str(edge[0]) + " " + str(edge[1]) + " " + str(edge[2]) + " " + str(edge[3]) + " " + str(
  66. edge[
  67. 4]) + "\n")
  68. file.write("? " + str(graph_type) + " " + file_path.replace(path_ori + "/", "") + "\n\n")
  69. with open(path_out_test, 'w') as file:
  70. for file_path in file_path_list[lens: len(file_path_list)]:
  71. func_graph = gen_graph_from_file(file_path)
  72. for edge in func_graph:
  73. file.write(
  74. "" + str(edge[0]) + " " + str(edge[1]) + " " + str(edge[2]) + " " + str(edge[3]) + " " + str(
  75. edge[
  76. 4]) + "\n")
  77. file.write("? " + str(graph_type) + " " + file_path.replace(path_ori + "/", "") + "\n\n")
  78. if __name__ == '__main__':
  79. gen_graph_to_txt()
  80. # graph = gen_graph_from_file("/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/purposeSplit/Directory/advance_touch_1.py", )
  81. # a = 5