parse2nd.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. import ast
  2. import logging
  3. from models.funcnode import get_script
  4. from models.sentencenode import SuspectedSentenceNode
  5. from utils.funclink import ProjectAnalyzer
  6. from utils.ERRORLIST import error_list
  7. def add_sentence_purpose(sentence_node_list, file_name, line_no, private_info_list_to_be_added):
  8. """
  9. Args:
  10. sentence_node_list:
  11. file_name: 所被添加purpose 的sentence_node 的 文件名
  12. line_no: 所被添加purpose 的sentence_node 的 所在行
  13. private_info_list_to_be_added: 用于添加 purpose 的(datatype,purpose)元素对
  14. Returns:
  15. """
  16. for sentence_node in sentence_node_list:
  17. if sentence_node.file_path == file_name and sentence_node.line_no == line_no:
  18. private_info_without_usage = [info for info in sentence_node.private_info if info[1] != "None"]
  19. for pair in private_info_list_to_be_added:
  20. # private_info 添加
  21. private_info_each = [(private[0], pair[1]) for private in sentence_node.private_info if
  22. private[1] == "None" and pair[1] != "None"]
  23. private_info_without_usage.extend(private_info_each)
  24. # purpose 添加
  25. if pair[1] != "None" and pair[1] not in sentence_node.purpose:
  26. sentence_node.purpose.append(pair[1])
  27. # sentence_node.purpose = [purpose for purpose in
  28. # sentence_node.purpose + [item[1] for item in private_info_list_to_be_added] if
  29. # purpose != "Usage"]
  30. if "None" in sentence_node.purpose:
  31. sentence_node.purpose.remove("None")
  32. # if len(sentence_node.purpose) != 0:
  33. # sentence_node.purpose.remove("Usage")
  34. if private_info_without_usage:
  35. sentence_node.private_info = private_info_without_usage
  36. break
  37. def get_func_list(node, func_list=None):
  38. """
  39. Args:
  40. node: ast_node
  41. func_list: 代码调用的方法列表
  42. Returns:
  43. """
  44. if func_list is None:
  45. func_list = []
  46. if isinstance(node, ast.Call):
  47. if isinstance(node.func, ast.Attribute):
  48. func_list.append(node.func.attr)
  49. elif isinstance(node.func, ast.Name):
  50. func_list.append(node.func.id)
  51. if len(node.args) > 0:
  52. for arg in node.args:
  53. func_list = get_func_list(arg, func_list)
  54. elif isinstance(node, ast.List) or isinstance(node, ast.Tuple) or isinstance(node, ast.Set):
  55. for arg in node.elts:
  56. func_list = get_func_list(arg, func_list)
  57. elif isinstance(node, ast.Compare):
  58. for comp in node.comparators:
  59. func_list = get_func_list(comp, func_list)
  60. func_list = get_func_list(node.left, func_list)
  61. elif isinstance(node, ast.withitem):
  62. func_list = get_func_list(node.context_expr)
  63. return func_list
  64. def parse_tree2nd(source_dir, p, node, lines, func_node_dict, node_list_1st, file_name, node_list=None,
  65. func_name="__main__",
  66. class_name=None):
  67. """
  68. Args:
  69. source_dir: -
  70. p: 函数调用关系的类
  71. node: -
  72. lines: -
  73. func_node_dict: 全项目函数调用图
  74. node_list_1st: 第一次便利结果
  75. file_name: -
  76. node_list: -
  77. func_name: -
  78. class_name: -
  79. Returns:
  80. """
  81. if node_list is None:
  82. node_list = []
  83. func_list = [] # node 中所有的方法
  84. if isinstance(node, ast.Expr) or isinstance(node, ast.Assign) or isinstance(node, ast.Return):
  85. func_list = get_func_list(node.value)
  86. elif isinstance(node, ast.For):
  87. func_list = get_func_list(node.iter)
  88. elif isinstance(node, ast.While) or isinstance(node, ast.If):
  89. func_list = get_func_list(node.test)
  90. elif isinstance(node, ast.With):
  91. for item in node.items:
  92. func_list = get_func_list(item, func_list)
  93. if len(func_list) > 0:
  94. func_call = [] # 该Func define node 调用的方法
  95. func_path = None
  96. if func_name is not None:
  97. func_path = file_name.replace("\\", '/').replace(
  98. source_dir.replace("\\", "/") + "/", '').replace('py',
  99. func_name).replace(
  100. '/', '.')
  101. if class_name is not None:
  102. func_path = file_name.replace("\\", '/').replace(
  103. source_dir.replace("\\", '/') + "/", '').replace('py',
  104. class_name + '.' + func_name).replace(
  105. '/', '.')
  106. # print(func_path)
  107. # func_path = source_dir.split("\\")[-1] + "." + func_path
  108. # print(func_path)
  109. try:
  110. func_call = p.find_direct_callee_func(func_path)
  111. except:
  112. pass
  113. func_list = list(set(func_list))
  114. # 隐私类型传递 func{data,usage} 中data 为空,则将func.usage赋予data,usage else extend
  115. # c(): a.b b中有隐私,赋给 c
  116. private_info = []
  117. for func in func_list:
  118. for func_c in func_call:
  119. if func == func_c.split('.')[-1]:
  120. if func_c in func_node_dict.keys():
  121. for pair in func_node_dict[func_c]: # 被调用的方法有什么隐私数据和操作 调用的该行就有什么数据和操作
  122. private_info.append(pair)
  123. add_sentence_purpose(node_list_1st, file_name, node.lineno, func_node_dict[func_c])
  124. script = get_script(node, lines)
  125. if len(private_info) > 0:
  126. sentence_node = SuspectedSentenceNode(file_name, node.lineno, private_word_list=None, purpose=None,
  127. func_name=func_name,
  128. private_info=private_info, script=script)
  129. # print(private_info)
  130. has = False
  131. for node_1st in node_list_1st:
  132. if sentence_node == node_1st:
  133. has = True
  134. break
  135. if not has:
  136. node_list.append(sentence_node)
  137. if isinstance(node, ast.ClassDef):
  138. for node_son in node.body:
  139. node_list = parse_tree2nd(source_dir, p, node_son, lines, func_node_dict, node_list_1st,
  140. file_name, node_list, func_name=node.name,
  141. class_name=node.name)
  142. elif isinstance(node, ast.FunctionDef):
  143. for node_son in node.body:
  144. node_list = parse_tree2nd(source_dir, p, node_son, lines, func_node_dict, node_list_1st,
  145. file_name, node_list, func_name=node.name,
  146. class_name=class_name)
  147. else:
  148. if hasattr(node, "body"):
  149. for node_son in node.body:
  150. node_list = parse_tree2nd(source_dir, p, node_son, lines, func_node_dict, node_list_1st,
  151. file_name, node_list, func_name=func_name,
  152. class_name=class_name)
  153. #
  154. if isinstance(node, ast.If):
  155. for node_son in node.orelse:
  156. node_list = parse_tree2nd(source_dir, p, node_son, lines, func_node_dict, node_list_1st,
  157. file_name, node_list, func_name=func_name,
  158. class_name=class_name)
  159. return node_list
  160. def parse_files_2nd(file_list, source, func_node_dict, node_list1st):
  161. """
  162. Args:
  163. file_list: -
  164. source: -
  165. func_node_dict:函数调用图
  166. node_list1st: 第一次遍历结果
  167. Returns:
  168. -
  169. """
  170. p = ProjectAnalyzer(source, file_list)
  171. node_list = []
  172. for file_name in file_list:
  173. with open(file_name, encoding='utf-8') as file_single:
  174. lines = file_single.readlines()
  175. try:
  176. tree_root = ast.parse(''.join(lines))
  177. except SyntaxError as e:
  178. e.filename = file_name
  179. error_list.append(e)
  180. pass
  181. node_list_single = parse_tree2nd(source, p, tree_root, lines, func_node_dict,
  182. node_list1st, file_name, [])
  183. node_list.extend(node_list_single)
  184. return node_list