parse.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. import ast
  2. import re
  3. from models.funcnode import FuncNode, get_script, match_data_type, \
  4. match_purpose_type
  5. from models.sentencenode import SuspectedSentenceNode
  6. from utils import log
  7. from utils.ERRORLIST import error_list
  8. logging = log.getlogger()
  9. def parse_tree(source, lattices, file_name, tree_node, code_lines, node_list=None, func_dict=None, class_name=None):
  10. """
  11. Args:
  12. source: 项目路径
  13. lattices: 隐私类型
  14. file_name: 文件名(单个)
  15. tree_node: ast节点
  16. code_lines: 源代码字符(readlines())
  17. node_list: sentencenode列表
  18. func_dict: 函数调用字典
  19. class_name: 方法类名
  20. Returns:
  21. """
  22. if func_dict is None:
  23. func_dict = {}
  24. if node_list is None:
  25. node_list = []
  26. data_type = lattices["dataType"]
  27. purpose_dict = lattices["purpose"]
  28. if isinstance(tree_node, ast.FunctionDef) or isinstance(tree_node, ast.AsyncFunctionDef):
  29. func_node = FuncNode(tree_node, file_name, lattices, code_lines)
  30. try:
  31. all_nodes = func_node.get_sentence_nodes()
  32. except AttributeError:
  33. raise AttributeError(file_name, tree_node.lineno)
  34. node_list.extend(all_nodes)
  35. if len(func_node.private_info) > 0:
  36. source_replace = source.replace('\\', '/')
  37. if class_name is None:
  38. func_path = func_node.file_path.replace("\\", '/').replace(
  39. source_replace + '/', '').replace('.py',
  40. '/' + func_node.func_name).replace(
  41. '/', '.')
  42. else:
  43. func_path = func_node.file_path.replace("\\", '/').replace(
  44. source_replace + '/', '').replace('.py',
  45. '/' + class_name + "/" + func_node.func_name).replace(
  46. '/', '.')
  47. func_dict[func_path] = func_node.private_info
  48. elif isinstance(tree_node, ast.ClassDef):
  49. class_name = tree_node.name
  50. for node_son in tree_node.body:
  51. if isinstance(node_son, ast.FunctionDef) or isinstance(tree_node, ast.AsyncFunctionDef):
  52. node_list, func_dict = parse_tree(source, lattices, file_name, node_son, code_lines, node_list,
  53. func_dict, class_name)
  54. # elif not isinstance(tree_node, ast.Module):
  55. # line_no = tree_node.lineno
  56. # script_ori, script = get_script(tree_node, code_lines)
  57. #
  58. # private_word_list = match_data_type(script['vars'], data_type)
  59. # private_word_list = list(set(private_word_list))
  60. #
  61. # # print(script['methods'])
  62. # purpose = match_purpose_type(script['methods'], purpose_dict)
  63. # if not (("None", "none") in private_word_list and purpose == ["None"]):
  64. # sentence_node = SuspectedSentenceNode(file_name, line_no,
  65. # private_word_list, purpose, None,
  66. # script=script_ori)
  67. # # print(private_word_list, purpose)
  68. # node_list.append(sentence_node)
  69. try:
  70. for node_son in tree_node.body:
  71. node_list, func_dict = parse_tree(source, lattices, file_name, node_son, code_lines,
  72. node_list, func_dict)
  73. except AttributeError:
  74. pass
  75. return node_list, func_dict
  76. def parse_files(file_list, source, lattices):
  77. """
  78. Args:
  79. file_list: 文件名列表
  80. source: 文件路径
  81. lattices: 隐私类型
  82. Returns:
  83. node_list:[<models.sentencenode.SuspectedSentenceNode object at 0x10e786eb0>,
  84. <models.sentencenode.SuspectedSentenceNode object at 0x10e786f10>]
  85. node_list为sentencenode对象列表,sentencenode对象可打印。
  86. [sentencenode1, sentencenode2...]
  87. func_node_dict: {'sdk_api.saltstack.SaltAPI.__init__': [('PassWord', 'Usage')],
  88. 'sdk_api.saltstack.SaltAPI.token_id': [('UserName', 'Usage'), ('PassWord', 'Usage')],
  89. 'sdk_api.saltstack.__init__': [('PassWord', 'Usage')],
  90. {func_path:[(private_info, purpose), ...]}
  91. """
  92. node_list = []
  93. func_dict = {}
  94. for file_name in file_list:
  95. with open(file_name, encoding='utf-8') as file_single:
  96. logging.error("Constructing file to ast:" + file_name)
  97. lines = file_single.readlines()
  98. file_string = re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", ''.join(lines))
  99. try:
  100. tree_root = ast.parse(file_string)
  101. except SyntaxError as e:
  102. e.filename = file_name
  103. error_list.append(e)
  104. pass
  105. node_list_single, func_dict = parse_tree(source, lattices, file_name, tree_root, lines, func_dict=func_dict)
  106. node_list.extend(node_list_single)
  107. return node_list, func_dict
  108. def add_code(lattices, file_name, tree_node, code_lines, node_list):
  109. """
  110. Args:
  111. source: 目录
  112. lattices: datatype purpose 格
  113. file_name: 文件名
  114. tree_node:
  115. code_lines: 代码行列表
  116. node_list: sentencenode list
  117. Returns:
  118. """
  119. data_type = lattices["dataType"]
  120. purpose_dict = lattices["purpose"]
  121. if not isinstance(tree_node, ast.FunctionDef) and not isinstance(tree_node,
  122. ast.AsyncFunctionDef) and not isinstance(tree_node,
  123. ast.ClassDef) and not isinstance(
  124. tree_node, ast.Import) and not isinstance(tree_node, ast.Module):
  125. line_no = tree_node.lineno
  126. script_ori, script = get_script(tree_node, code_lines)
  127. private_word_list = match_data_type(script['vars'], data_type)
  128. private_word_list = list(set(private_word_list))
  129. # print(script['methods'])
  130. purpose = match_purpose_type(script['methods'], purpose_dict)
  131. if not (("None", "none") in private_word_list and purpose == ["None"]) and not has_node(node_list, file_name,
  132. line_no):
  133. sentence_node = SuspectedSentenceNode(file_name, line_no,
  134. private_word_list, purpose, None,
  135. script=script_ori)
  136. # print(private_word_list, purpose)
  137. node_list.append(sentence_node)
  138. try:
  139. for node_son in tree_node.body:
  140. add_code(lattices, file_name, node_son, code_lines,
  141. node_list)
  142. except AttributeError:
  143. pass
  144. def has_node(node_list, file_name, line_no):
  145. """
  146. Args:
  147. node_list: sentenceNodes list
  148. file_name: 文件名
  149. line_no: 行号
  150. Returns:
  151. node_list是否含有file_name line_no 的sentencenode
  152. """
  153. for node in node_list:
  154. if node.file_path == file_name and node.line_no == line_no:
  155. return True
  156. return False
  157. def add_code_outside_func(file_list, lattices, node_list):
  158. """
  159. Args:
  160. file_list: 文件名列表
  161. lattices: 隐私类型
  162. node_list: senetencenode 列表
  163. Returns:
  164. node_list:[<models.sentencenode.SuspectedSentenceNode object at 0x10e786eb0>,
  165. <models.sentencenode.SuspectedSentenceNode object at 0x10e786f10>]
  166. node_list为sentencenode对象列表,sentencenode对象可打印。
  167. [sentencenode1, sentencenode2...]
  168. """
  169. for file_name in file_list:
  170. with open(file_name, encoding='utf-8') as file_single:
  171. logging.error("Constructing file to ast:" + file_name)
  172. lines = file_single.readlines()
  173. file_string = re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def __main__()", ''.join(lines))
  174. # file_string = ''.join(lines)
  175. try:
  176. tree_root = ast.parse(file_string)
  177. except SyntaxError as e:
  178. e.filename = file_name
  179. error_list.append(e)
  180. pass
  181. add_code(lattices, file_name, tree_root, lines, node_list)
  182. return node_list
  183. if __name__ == '__main__':
  184. # print(re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", "if __name__ == '__main__'"))
  185. # print(re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", "if __name__==\"__main__\""))
  186. # print("if __name__==\"__main__\"".replace(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()"))
  187. with open("/Users/liufan/program/PYTHON/SAP/privacyScanLsn/test/main.py", encoding='utf-8') as file_single:
  188. lines = file_single.read()
  189. # print(lines)
  190. # ast.parse(lines)
  191. tree_root = ast.parse(lines)
  192. print(lines)