123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- import ast
- import re
- from models.funcnode import FuncNode, get_script, match_data_type, \
- match_purpose_type
- from models.sentencenode import SuspectedSentenceNode
- from utils import log
- from utils.ERRORLIST import error_list
- logging = log.getlogger()
- def parse_tree(source, lattices, file_name, tree_node, code_lines, node_list=None, func_dict=None, class_name=None):
- """
- Args:
- source: 项目路径
- lattices: 隐私类型
- file_name: 文件名(单个)
- tree_node: ast节点
- code_lines: 源代码字符(readlines())
- node_list: sentencenode列表
- func_dict: 函数调用字典
- class_name: 方法类名
- Returns:
- """
- if func_dict is None:
- func_dict = {}
- if node_list is None:
- node_list = []
- data_type = lattices["dataType"]
- purpose_dict = lattices["purpose"]
- if isinstance(tree_node, ast.FunctionDef):
- func_node = FuncNode(tree_node, file_name, lattices, code_lines)
- try:
- all_nodes = func_node.get_sentence_nodes()
- except AttributeError:
- raise AttributeError(file_name, tree_node.lineno)
- node_list.extend(all_nodes)
- if len(func_node.private_info) > 0:
- source_replace = source.replace('\\', '/')
- if class_name is None:
- func_path = func_node.file_path.replace("\\", '/').replace(
- source_replace + '/', '').replace('.py',
- '/' + func_node.func_name).replace(
- '/', '.')
- else:
- func_path = func_node.file_path.replace("\\", '/').replace(
- source_replace + '/', '').replace('.py',
- '/' + class_name + "/" + func_node.func_name).replace(
- '/', '.')
- func_dict[func_path] = func_node.private_info
- elif isinstance(tree_node, ast.ClassDef):
- class_name = tree_node.name
- for node_son in tree_node.body:
- if isinstance(node_son, ast.FunctionDef):
- node_list, func_dict = parse_tree(source, lattices, file_name, node_son, code_lines, node_list,
- func_dict, class_name)
- # elif not isinstance(tree_node, ast.Module):
- # line_no = tree_node.lineno
- # script_ori, script = get_script(tree_node, code_lines)
- #
- # private_word_list = match_data_type(script['vars'], data_type)
- # private_word_list = list(set(private_word_list))
- #
- # # print(script['methods'])
- # purpose = match_purpose_type(script['methods'], purpose_dict)
- # if not (("None", "none") in private_word_list and purpose == ["None"]):
- # sentence_node = SuspectedSentenceNode(file_name, line_no,
- # private_word_list, purpose, None,
- # script=script_ori)
- # # print(private_word_list, purpose)
- # node_list.append(sentence_node)
- try:
- for node_son in tree_node.body:
- node_list, func_dict = parse_tree(source, lattices, file_name, node_son, code_lines,
- node_list, func_dict)
- except AttributeError:
- pass
- return node_list, func_dict
- def parse_files(file_list, source, lattices):
- """
- Args:
- file_list: 文件名列表
- source: 文件路径
- lattices: 隐私类型
- Returns:
- node_list:[<models.sentencenode.SuspectedSentenceNode object at 0x10e786eb0>,
- <models.sentencenode.SuspectedSentenceNode object at 0x10e786f10>]
- node_list为sentencenode对象列表,sentencenode对象可打印。
- [sentencenode1, sentencenode2...]
- func_node_dict: {'sdk_api.saltstack.SaltAPI.__init__': [('PassWord', 'Usage')],
- 'sdk_api.saltstack.SaltAPI.token_id': [('UserName', 'Usage'), ('PassWord', 'Usage')],
- 'sdk_api.saltstack.__init__': [('PassWord', 'Usage')],
- {func_path:[(private_info, purpose), ...]}
- """
- node_list = []
- func_dict = {}
- for file_name in file_list:
- with open(file_name, encoding='utf-8') as file_single:
- logging.error("Constructing file to ast:" + file_name)
- lines = file_single.readlines()
- file_string = re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", ''.join(lines))
- try:
- tree_root = ast.parse(file_string)
- except SyntaxError as e:
- e.filename = file_name
- error_list.append(e)
- pass
- node_list_single, func_dict = parse_tree(source, lattices, file_name, tree_root, lines, func_dict=func_dict)
- node_list.extend(node_list_single)
- return node_list, func_dict
- def add_code(lattices, file_name, tree_node, code_lines, node_list):
- """
- Args:
- source: 目录
- lattices: datatype purpose 格
- file_name: 文件名
- tree_node:
- code_lines: 代码行列表
- node_list: sentencenode list
- Returns:
- """
- data_type = lattices["dataType"]
- purpose_dict = lattices["purpose"]
- if not isinstance(tree_node, ast.FunctionDef) and not isinstance(tree_node, ast.ClassDef) and not isinstance(
- tree_node, ast.Import) and not isinstance(tree_node, ast.Module):
- line_no = tree_node.lineno
- script_ori, script = get_script(tree_node, code_lines)
- private_word_list = match_data_type(script['vars'], data_type)
- private_word_list = list(set(private_word_list))
- # print(script['methods'])
- purpose = match_purpose_type(script['methods'], purpose_dict)
- if not (("None", "none") in private_word_list and purpose == ["None"]) and not has_node(node_list, file_name,
- line_no):
- sentence_node = SuspectedSentenceNode(file_name, line_no,
- private_word_list, purpose, None,
- script=script_ori)
- # print(private_word_list, purpose)
- node_list.append(sentence_node)
- try:
- for node_son in tree_node.body:
- add_code(lattices, file_name, node_son, code_lines,
- node_list)
- except AttributeError:
- pass
- def has_node(node_list, file_name, line_no):
- """
- Args:
- node_list: sentenceNodes list
- file_name: 文件名
- line_no: 行号
- Returns:
- node_list是否含有file_name line_no 的sentencenode
- """
- for node in node_list:
- if node.file_path == file_name and node.line_no == line_no:
- return True
- return False
- def add_code_outside_func(file_list, lattices, node_list):
- """
- Args:
- file_list: 文件名列表
- lattices: 隐私类型
- node_list: senetencenode 列表
- Returns:
- node_list:[<models.sentencenode.SuspectedSentenceNode object at 0x10e786eb0>,
- <models.sentencenode.SuspectedSentenceNode object at 0x10e786f10>]
- node_list为sentencenode对象列表,sentencenode对象可打印。
- [sentencenode1, sentencenode2...]
- """
- for file_name in file_list:
- with open(file_name, encoding='utf-8') as file_single:
- logging.error("Constructing file to ast:" + file_name)
- lines = file_single.readlines()
- file_string = re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def __main__()", ''.join(lines))
- # file_string = ''.join(lines)
- try:
- tree_root = ast.parse(file_string)
- except SyntaxError as e:
- e.filename = file_name
- error_list.append(e)
- pass
- add_code(lattices, file_name, tree_root, lines, node_list)
- return node_list
- if __name__ == '__main__':
- # print(re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", "if __name__ == '__main__'"))
- # print(re.sub(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()", "if __name__==\"__main__\""))
- # print("if __name__==\"__main__\"".replace(r"if[ ]*__name__[ ]*==[ ]*['\"]__main__['\"]", "def main()"))
- with open("/Users/liufan/program/PYTHON/SAP/privacyScanLsn/test/main.py", encoding='utf-8') as file_single:
- lines = file_single.read()
- # print(lines)
- # ast.parse(lines)
- tree_root = ast.parse(lines)
- print(lines)
|