AstGraph.py 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import ast
  2. import os
  3. from graph.common.nodetype import *
  4. from graph.common.graphtype import *
  5. from utils.fileio import *
  6. CUR_PATH = os.path.dirname(__file__)
  7. i = 0
  8. def ast_visit(node, graph=None, id_list=None):
  9. if graph is None:
  10. graph = list()
  11. if id_list is None:
  12. id_list = [node]
  13. iter_field = ast.iter_fields(node)
  14. for field, value in iter_field:
  15. # print(type(node).__name__, field, value)
  16. if isinstance(value, list):
  17. for item in value:
  18. if isinstance(item, ast.AST):
  19. id_list.append(item)
  20. graph.append(
  21. (id_list.index(node) + 1, NodeType.node_map[type(node).__name__], 1,
  22. id_list.index(item) + 1, NodeType.node_map[type(item).__name__])
  23. )
  24. ast_visit(item, graph, id_list)
  25. elif isinstance(value, ast.AST):
  26. id_list.append(value)
  27. graph.append(
  28. (id_list.index(node) + 1, NodeType.node_map[type(node).__name__], 1,
  29. id_list.index(value) + 1, NodeType.node_map[type(value).__name__])
  30. )
  31. ast_visit(value, graph, id_list)
  32. def gen_graph_from_file(file_path):
  33. with open(file_path, 'r') as file:
  34. content = file.read()
  35. func_graph = list()
  36. try:
  37. root = ast.parse(content)
  38. for node in root.body:
  39. if isinstance(node, ast.FunctionDef):
  40. ast_visit(node, func_graph)
  41. except IndentationError:
  42. print("IndentationError: ", file_path)
  43. except SyntaxError:
  44. print("SyntaxError: ", file_path)
  45. except:
  46. print("other: ", file_path)
  47. return func_graph
  48. def gen_graph_to_txt():
  49. path_ori = CUR_PATH + "/../../data/purposeSplit"
  50. kinds = os.listdir(path_ori)
  51. kinds.remove('.DS_Store')
  52. for kind in kinds:
  53. graph_type = GraphType.type[kind]
  54. path_out_train = CUR_PATH + "/../../data/traindata/train/" + str(kind) + ".txt"
  55. path_out_test = CUR_PATH + "/../../data/traindata/test/" + str(kind) + ".txt"
  56. file_path_list = walk_files(path_ori + "/" + kind)
  57. lens = int(len(file_path_list) / 4 * 3)
  58. with open(path_out_train, 'w') as file:
  59. for file_path in file_path_list[:lens]:
  60. func_graph = gen_graph_from_file(file_path)
  61. for edge in func_graph:
  62. file.write(
  63. "" + str(edge[0]) + " " + str(edge[1]) + " " + str(edge[2]) + " " + str(edge[3]) + " " + str(
  64. edge[
  65. 4]) + "\n")
  66. file.write("? " + str(graph_type) + " " + file_path.replace(path_ori + "/", "") + "\n\n")
  67. with open(path_out_test, 'w') as file:
  68. for file_path in file_path_list[lens: len(file_path_list)]:
  69. func_graph = gen_graph_from_file(file_path)
  70. for edge in func_graph:
  71. file.write(
  72. "" + str(edge[0]) + " " + str(edge[1]) + " " + str(edge[2]) + " " + str(edge[3]) + " " + str(
  73. edge[
  74. 4]) + "\n")
  75. file.write("? " + str(graph_type) + " " + file_path.replace(path_ori + "/", "") + "\n\n")
  76. if __name__ == '__main__':
  77. gen_graph_to_txt()
  78. # graph = gen_graph_from_file("/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/purposeSplit/Directory/advance_touch_1.py", )
  79. # a = 5