vor 4 Jahren · 0cbe02b5ef
--- a/.DS_Store
+++ b/.DS_Store
--- a/项目.md
+++ b/项目.md
@@ -0,0 +1,127 @@
 
				+Share
			
 
				+Internal Share
			
 
				+External Share
			
 
				+w/ThirdParty
			
 
				+
			
 
				+w/ServiceProvider
			
 
				+w/Consultant
			
 
				+Store
			
 
				+https://github.com/zenodo/zenodo/blob/7af3c9e57367d849f5151da68d8929cf5c0b9c7d/scripts/upload.py#L84
			
 
				+Local
			
 
				+File
			
 
				+Directory
			
 
				+https://github.com/Amzza0x00/ghostpotato/blob/master/examples/sambaPipe.py#L90
			
 
				+Messaging System
			
 
				+Kafka
			
 
				+https://github.com/dpkp/kafka-python/blob/master/example.py#L20
			
 
				+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/confluent_cloud.py#L98
			
 
				+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/producer.py#L55
			
 
				+https://github.com/owenliang/kafka/blob/master/producer.py
			
 
				+NATS
			
 
				+https://github.com/nats-io/nats.py/blob/main/examples/example.py
			
 
				+https://github.com/Gr1N/nats-python/blob/master/tests/test_client.py#L111
			
 
				+Cloud Service
			
 
				+Azure
			
 
				+https://github.com/MadTownMark/azure-storage-blob/blob/master/sdk/storage/azure-storage-file-share/samples/file_samples_hello_world.py#L75
			
 
				+S3
			
 
				+https://github.com/keithweaver/python-aws-s3/blob/master/example-upload-public.py
			
 
				+https://github.com/boto/s3transfer/blob/develop/tests/integration/test_upload.py#L53
			
 
				+https://github.com/nagwww/101-AWS-S3-Hacks/blob/master/s3sendfile.py#L18
			
 
				+https://github.com/torchbox/christmas-video-2017/blob/master/xmasvideo/s3.py#L56
			
 
				+https://github.com/pankajr141/libs3/blob/master/libs3/shutil.py#L111
			
 
				+https://github.com/bruno990/igti-edc-desafiofinal/blob/main/extract-censo/extract-censo.py#L72
			
 
				+Visualize
			
 
				+https://github.com/mwaskom/seaborn/blob/master/examples/anscombes_quartet.py#L14
			
 
				+https://github.com/matplotlib/matplotlib/blob/main/examples/userdemo/annotate_simple01.py#L12
			
 
				+https://github.com/zachwill/flask-engine/blob/master/app/views.py#L28
			
 
				+Archive
			
 
				+Anonymize
			
 
				+Hash
			
 
				+https://github.com/davidaurelio/hashids-python/blob/master/test/test_hashids.py#L44
			
 
				+https://github.com/akamhy/videohash/blob/main/tests/test_videohash.py#L17
			
 
				+https://github.com/Dstar4/Hash-Tables/blob/master/basic_hashtable/b_hashtables.py#L92
			
 
				+https://github.com/bunchesofdonald/photohash
			
 
				+Truncate
			
 
				+https://github.com/ojitha/code-challenge-1/blob/main/Third_step.py#L38
			
 
				+https://github.com/beerfleet/udemy_tutorial/blob/master/Oefeningen/uDemy/bootcamp/lots_of_exercises/truncate.py#L18
			
 
				+Pseudonym
			
 
				+https://github.com/prechelt/pseudonymizer/blob/master/pseudonymizer/tests/test_pseudonymizer.py#L10
			
 
				+https://github.wdf.sap.corp/ICN-Nanjing-Projects/Data-Anonymization/blob/master/ds4ml/command/synthesize.py#L78
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+1. Kafka s3 nats  没有
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+https://github.com/zenodo/zenodo
			
 
				+
			
 
				+https://github.com/Amzza0x00/ghostpotato
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/dpkp/kafka-python
			
 
				+
			
 
				+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/
			
 
				+
			
 
				+https://github.com/owenliang/kafka
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/nats-io/nats.py/blob/main/examples
			
 
				+
			
 
				+https://github.com/Gr1N/nats-python/blob/master/tests/
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/MadTownMark/azure-storage-blob/blob/master/sdk/storage/azure-storage-file-share/samples/
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/keithweaver/python-aws-s3/blob/master/example-upload-public.py
			
 
				+
			
 
				+https://github.com/boto/s3transfer/blob/develop/tests/integration/test_upload.py
			
 
				+
			
 
				+https://github.com/nagwww/101-AWS-S3-Hacks/blob/master/s3sendfile.py
			
 
				+
			
 
				+https://github.com/torchbox/christmas-video-2017/blob/master/xmasvideo/s3.py
			
 
				+
			
 
				+https://github.com/pankajr141/libs3/blob/master/libs3/shutil.py
			
 
				+
			
 
				+https://github.com/bruno990/igti-edc-desafiofinal/blob/main/extract-censo/extract-censo.py
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/mwaskom/seaborn/blob/master/examples/anscombes_quartet.py
			
 
				+
			
 
				+https://github.com/matplotlib/matplotlib/blob/main/examples/userdemo/annotate_simple01.py
			
 
				+
			
 
				+https://github.com/zachwill/flask-engine/blob/master/app/views.py
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/davidaurelio/hashids-python/blob/master/test/test_hashids.py
			
 
				+
			
 
				+https://github.com/akamhy/videohash/blob/main/tests/test_videohash.py
			
 
				+
			
 
				+https://github.com/Dstar4/Hash-Tables/blob/master/basic_hashtable/b_hashtables.py
			
 
				+
			
 
				+https://github.com/bunchesofdonald/photohash
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/ojitha/code-challenge-1/blob/main/Third_step.py
			
 
				+
			
 
				+https://github.com/beerfleet/udemy_tutorial/blob/master/Oefeningen/uDemy/bootcamp/lots_of_exercises/truncate.py
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+https://github.com/prechelt/pseudonymizer/blob/master/pseudonymizer/tests/test_pseudonymizer.py
			
 
				+
			
 
				+https://github.wdf.sap.corp/ICN-Nanjing-Projects/Data-Anonymization/blob/master/ds4ml/command/synthesize.py
			
 
				+
			
 
				+ 
			
--- a/Templete.json
+++ b/Templete.json
@@ -0,0 +1,20 @@
 
				+[
			
 
				+    {
			
 
				+      "Location":"cmdb/views/controller/ambientController.py 159",
			
 
				+      "DataType": "UserName",
			
 
				+      "Purpose": "Usage",
			
 
				+      "Note":""
			
 
				+    },
			
 
				+    {
			
 
				+      "Location":"cmdb/views/controller/ambientController.py 159",
			
 
				+      "DataType": "EmailAddredss",
			
 
				+      "Purpose": "Usage",
			
 
				+      "Note":""
			
 
				+    },
			
 
				+    {
			
 
				+      "Location":"cmdb/views/controller/ambientController.py 159",
			
 
				+      "DataType": "PhoneNumber",
			
 
				+      "Purpose": "Usage",
			
 
				+      "Note":""
			
 
				+    }
			
 
				+]
			
--- a/accuracy/__init__.py
+++ b/accuracy/__init__.py
--- a/accuracy/__pycache__/__init__.cpython-39.pyc
+++ b/accuracy/__pycache__/__init__.cpython-39.pyc
--- a/accuracy/__pycache__/accuracytest.cpython-39.pyc
+++ b/accuracy/__pycache__/accuracytest.cpython-39.pyc
--- a/accuracy/accuracytest.py
+++ b/accuracy/accuracytest.py
@@ -0,0 +1,90 @@
 
				+import xlrd
			
 
				+
			
 
				+from utils.fileio import load_location, load_data_purpose_split, list_to_excel
			
 
				+
			
 
				+
			
 
				+def test_recall_accuracy(suspected_node_list, source):
			
 
				+    """
			
 
				+    查全率=（检索出的相关信息量/系统中的相关信息总量）
			
 
				+    :param suspected_node_list:
			
 
				+    :param source:
			
 
				+    :return:
			
 
				+    """
			
 
				+    location_dict = load_location("项目校对表-旧.xlsx")
			
 
				+    location_num = len(location_dict.keys())
			
 
				+    recall_location = 0
			
 
				+    recall_accurate = 0
			
 
				+    print("准确的结果如下：")
			
 
				+    for node in suspected_node_list:
			
 
				+        if node.private_info is None:
			
 
				+            node.private_info = [(key_word[0], node.purpose) for key_word in node.private_word_list]
			
 
				+        print((node.file_path.replace(source + '\\', ''), node.line_no, node.private_info,node.purpose))
			
 
				+        if (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no) in location_dict.keys():
			
 
				+            recall_location += 1
			
 
				+            # print(node)
			
 
				+            # print(location_dict[(node.file_path.replace(source + '/', ''), node.line_no)])
			
 
				+            # print()
			
 
				+
			
 
				+            if node.private_info == location_dict[
			
 
				+                (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no)]:
			
 
				+                recall_accurate += 1
			
 
				+    if recall_location>0:
			
 
				+        print("查全率为： ", recall_accurate, "/", recall_location, '/', location_num, '/', recall_location / location_num)
			
 
				+        print("查准率为： ", recall_accurate, "/", recall_location, '/', len(suspected_node_list), '/',
			
 
				+              recall_location / len(suspected_node_list))
			
 
				+    return {"recall_accurate": recall_accurate, "recall_location": recall_location, "location_num": location_num}
			
 
				+
			
 
				+
			
 
				+def test_missed(suspected_node_list, source):
			
 
				+    location_dict = load_location("项目校对表-旧.xlsx")
			
 
				+    paths = [(node.file_path.replace(source + "\\", '').replace("\\", "/"), node.line_no) for node in
			
 
				+             suspected_node_list]
			
 
				+    res = []
			
 
				+    for node in location_dict.keys():
			
 
				+        if node not in paths:
			
 
				+            # print("未命中：" + node[0] + str(node[1]))
			
 
				+            res.append("未命中：" + node[0] + str(node[1]))
			
 
				+        else:
			
 
				+            # print("命中：" + node[0] + str(node[1]))
			
 
				+            res.append("命中：" + node[0] + str(node[1]))
			
 
				+    paths = [paths[0],paths[1],]
			
 
				+    return {"suspected_node_list": paths, "missed": res}
			
 
				+
			
 
				+
			
 
				+def test_stamp(stamp):
			
 
				+    # print(stamp)
			
 
				+    data_type_compute = []
			
 
				+    purpose_compute = []
			
 
				+    for st in stamp:
			
 
				+        loc = st[0] + ' ' + str(st[1])
			
 
				+        datatype = list(set([data[0] for data in st[2] if data[0] != 'Data']))
			
 
				+        purpose = list(set([data[1] for data in st[2] if data[1] != 'Usage']))
			
 
				+        for dt in datatype:
			
 
				+            data_type_compute.append((loc, dt))
			
 
				+        for pur in purpose:
			
 
				+            purpose_compute.append((loc, pur))
			
 
				+
			
 
				+    data_type_list, purpose_list = load_data_purpose_split("/Users/liufan/program/PYTHON/SAP/privacyScanLsn/项目校对表-旧.xlsx")
			
 
				+
			
 
				+    for data in data_type_list:
			
 
				+        if data not in data_type_compute:
			
 
				+            print(data)
			
 
				+
			
 
				+    for pur in purpose_list:
			
 
				+        if pur not in purpose_compute:
			
 
				+            print(pur)
			
 
				+
			
 
				+    data_type_all = list(set(data_type_compute + data_type_list))
			
 
				+    purpose_all = list(set(purpose_list + purpose_compute))
			
 
				+
			
 
				+    print("data_type准确率为： ", len(data_type_compute), "/", len(data_type_all),
			
 
				+          len(data_type_compute) / len(data_type_all))
			
 
				+    print("purpose准确率为： ", len(purpose_compute), "/", len(purpose_all),
			
 
				+          len(purpose_compute) / len(purpose_all))
			
 
				+
			
 
				+    list_to_excel(r'analyze/output/cmdb-python-master-标准.xls', data_type_all, purpose_all)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # load_location("项目校对表.xlsx")
			
 
				+    test_stamp()
			
--- a/algorithm/__init__.py
+++ b/algorithm/__init__.py
--- a/algorithm/__pycache__/__init__.cpython-39.pyc
+++ b/algorithm/__pycache__/__init__.cpython-39.pyc
--- a/algorithm/__pycache__/charactermatch.cpython-39.pyc
+++ b/algorithm/__pycache__/charactermatch.cpython-39.pyc
--- a/algorithm/charactermatch.py
+++ b/algorithm/charactermatch.py
@@ -0,0 +1,91 @@
 
				+import copy
			
 
				+import difflib
			
 
				+# import Levenshtein
			
 
				+
			
 
				+
			
 
				+# duplicated
			
 
				+def character_match_abbr(word_std, abbr, word):
			
 
				+    if word.find(word_std) != -1:
			
 
				+        return True
			
 
				+    while word.find(abbr[0]) != -1 and word.find(abbr[0]) + 3 <= len(word):
			
 
				+        word = word[word.find(abbr[0]):]
			
 
				+        copy_abbr = copy.deepcopy(abbr)
			
 
				+        flag = True
			
 
				+        for i in range(3):
			
 
				+            index = copy_abbr.find(word[0])
			
 
				+            if index == -1:
			
 
				+                flag = False
			
 
				+                break
			
 
				+            else:
			
 
				+                copy_abbr = copy_abbr[index:]
			
 
				+                word = word[1:]
			
 
				+        if flag:
			
 
				+            return True
			
 
				+        else:
			
 
				+            continue
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def character_match(word_std, word):
			
 
				+    """
			
 
				+    模糊匹配
			
 
				+    Args:
			
 
				+        word_std:
			
 
				+        word:
			
 
				+
			
 
				+    Returns:
			
 
				+    script_path
			
 
				+    """
			
 
				+    word, word_std = word.lower().replace("_", ""), word_std.lower()
			
 
				+    if word.find(word_std) != -1 or difflib.SequenceMatcher((lambda x: x in ["_", "/"]), word, word_std).ratio() > 0.9:
			
 
				+        return True
			
 
				+    else:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def word_match(word_std_list, word):
			
 
				+    """
			
 
				+
			
 
				+    Args:
			
 
				+        word_std_list: 可能的缩写类型
			
 
				+        word: 查询的单词
			
 
				+
			
 
				+    Returns:
			
 
				+        True/False
			
 
				+
			
 
				+    """
			
 
				+    if "ip" in word_std_list:
			
 
				+        word_std_list.remove("ip")
			
 
				+        if word == "ip" or word == 'IP' or word == 'Ip':
			
 
				+            return True
			
 
				+    for word_std in word_std_list:
			
 
				+        if character_match(word_std, word):
			
 
				+            return True
			
 
				+        else:
			
 
				+            continue
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def test_match(a, b):
			
 
				+    print(b.find(a) != -1)
			
 
				+    print(difflib.SequenceMatcher((lambda x: x in ["_", "/"]), a, b).ratio())
			
 
				+
			
 
				+    print()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "psd"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "userpwd"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "user_psw_1"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "pwa"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "passw"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "passpsw"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "user_password_a"))
			
 
				+    # print(word_match(["password", "pwd", "psw", "pswd"], "psw_a"))
			
 
				+    word_match(["pswd", "psw", "pwd", "password", "pass_word", "gitpass"], "gen_password")
			
 
				+    word_match(["key"], "gitkey")
			
 
				+    print(word_match(["Pseudonym", "alias"], "pseudonyms"))
			
 
				+    # word_match(["ipaddr", "IPAddress", "ip"], "output_dir")
			
 
				+    # word_match(["ipaddr", "IPAddress", "ip"], "os.path.pardir")
			
 
				+
			
 
				+# 包含+长度限制
			
--- a/algorithm/nlp.py
+++ b/algorithm/nlp.py
@@ -0,0 +1,50 @@
 
				+import torch
			
 
				+from transformers import BertModel, BertTokenizer
			
 
				+import numpy as np
			
 
				+import time
			
 
				+
			
 
				+
			
 
				+def get_word_vec(word):
			
 
				+    # 这里我们调用bert-base模型，同时模型的词典经过小写处理
			
 
				+    model_name = 'bert-base-uncased'
			
 
				+    # 读取模型对应的tokenizer
			
 
				+    tokenizer = BertTokenizer.from_pretrained(model_name)
			
 
				+    # 载入模型
			
 
				+    model = BertModel.from_pretrained(model_name)
			
 
				+    # 输入文本
			
 
				+    input_text = word
			
 
				+    # 通过tokenizer把文本变成 token_id
			
 
				+    input_ids = torch.tensor([tokenizer.encode(input_text_i) for input_text_i in input_text])
			
 
				+    max_len = 10
			
 
				+    # while len(input_ids)<max_len:
			
 
				+    #     input_ids.
			
 
				+    print(input_ids)
			
 
				+    # input_ids: [101, 2182, 2003, 2070, 3793, 2000, 4372, 16044, 102]
			
 
				+    # input_ids = torch.tensor([input_ids])
			
 
				+    # 获得BERT模型最后一个隐层结果
			
 
				+    with torch.no_grad():
			
 
				+        last_hidden_states = model(input_ids)[0]  # Models outputs are now tuples
			
 
				+        # print(model(input_ids))
			
 
				+    print(last_hidden_states)
			
 
				+    print(last_hidden_states.shape)
			
 
				+    """ tensor([[[-0.0549,  0.1053, -0.1065,  ..., -0.3550,  0.0686,  0.6506],
			
 
				+             [-0.5759, -0.3650, -0.1383,  ..., -0.6782,  0.2092, -0.1639],
			
 
				+             [-0.1641, -0.5597,  0.0150,  ..., -0.1603, -0.1346,  0.6216],
			
 
				+             ...,
			
 
				+             [ 0.2448,  0.1254,  0.1587,  ..., -0.2749, -0.1163,  0.8809],
			
 
				+             [ 0.0481,  0.4950, -0.2827,  ..., -0.6097, -0.1212,  0.2527],
			
 
				+             [ 0.9046,  0.2137, -0.5897,  ...,  0.3040, -0.6172, -0.1950]]]) 
			
 
				+        shape: (1, 9, 768)     
			
 
				+    """
			
 
				+    return last_hidden_states
			
 
				+
			
 
				+
			
 
				+def get_cos_similar(v1: list, v2: list):
			
 
				+    num = float(np.dot(v1, v2))  # 向量点乘
			
 
				+    denom = np.linalg.norm(v1) * np.linalg.norm(v2)  # 求模长的乘积
			
 
				+    return 0.5 + 0.5 * (num / denom) if denom != 0 else 0
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    vecs = get_word_vec([["psw", "name"], ["git", "name"]])
			
 
				+    print(get_cos_similar(vecs[0][0], vecs[1][0]))
			
--- a/analyze/.DS_Store
+++ b/analyze/.DS_Store
--- a/analyze/__init__.py
+++ b/analyze/__init__.py
--- a/analyze/__pycache__/__init__.cpython-39.pyc
+++ b/analyze/__pycache__/__init__.cpython-39.pyc
--- a/analyze/__pycache__/outanalyze.cpython-39.pyc
+++ b/analyze/__pycache__/outanalyze.cpython-39.pyc
--- a/analyze/combine.xlsx
+++ b/analyze/combine.xlsx
--- a/analyze/hand2.xlsx
+++ b/analyze/hand2.xlsx
--- a/analyze/outanalyze.py
+++ b/analyze/outanalyze.py
@@ -0,0 +1,40 @@
 
				+import xlwt
			
 
				+
			
 
				+from utils.fileio import load_location, write_json
			
 
				+
			
 
				+
			
 
				+def out_analyze(node_list, source, save_file: str, entire=False):
			
 
				+    book = xlwt.Workbook(encoding='utf-8')
			
 
				+    sheet = book.add_sheet("DataType")
			
 
				+
			
 
				+    cols = ["Location", "Function", "DataType", "Purpose"]
			
 
				+    if entire:
			
 
				+        cols.remove("Function")
			
 
				+
			
 
				+    for i in range(len(cols)):
			
 
				+        sheet.write(0, i, cols[i])
			
 
				+
			
 
				+    tmp_row = 1
			
 
				+    for i in range(len(node_list)):
			
 
				+        node = node_list[i]
			
 
				+        file_path = node.file_path.replace('\\', '/').replace(source.replace('\\', '/') + '/', '').split('/')[-1]
			
 
				+        location = file_path + "#L" + str(node.line_no)
			
 
				+
			
 
				+        for data_type, purpose in node.private_info:
			
 
				+            if not data_type:
			
 
				+                data_type = "None"
			
 
				+            if not purpose:
			
 
				+                purpose = "None"
			
 
				+            if not node.func_name:
			
 
				+                node.func_name = "None"
			
 
				+            sheet.write(tmp_row, 0, location)
			
 
				+            if not entire:
			
 
				+                sheet.write(tmp_row, 1, node.func_name)
			
 
				+                sheet.write(tmp_row, 2, data_type)
			
 
				+                sheet.write(tmp_row, 3, purpose)
			
 
				+            else:
			
 
				+                sheet.write(tmp_row, 1, data_type)
			
 
				+                sheet.write(tmp_row, 2, purpose)
			
 
				+            tmp_row += 1
			
 
				+
			
 
				+    book.save(save_file)
			
--- a/analyze/output/0-cmdb.xls
+++ b/analyze/output/0-cmdb.xls
--- a/analyze/output/1-tmp.xls
+++ b/analyze/output/1-tmp.xls
--- a/analyze/output/1-upload.xls
+++ b/analyze/output/1-upload.xls
--- a/analyze/output/10-example-upload-public.xls
+++ b/analyze/output/10-example-upload-public.xls
--- a/analyze/output/11-test_upload.xls
+++ b/analyze/output/11-test_upload.xls
--- a/analyze/output/12-s3sendfile.xls
+++ b/analyze/output/12-s3sendfile.xls
--- a/analyze/output/13-s3.xls
+++ b/analyze/output/13-s3.xls
--- a/analyze/output/14-shutil.xls
+++ b/analyze/output/14-shutil.xls
--- a/analyze/output/15-extract-censo.xls
+++ b/analyze/output/15-extract-censo.xls
--- a/analyze/output/16-anscombes_quartet.xls
+++ b/analyze/output/16-anscombes_quartet.xls
--- a/analyze/output/17-annotate_simple01.xls
+++ b/analyze/output/17-annotate_simple01.xls
--- a/analyze/output/18-views.xls
+++ b/analyze/output/18-views.xls
--- a/analyze/output/19-test_hashids.xls
+++ b/analyze/output/19-test_hashids.xls
--- a/analyze/output/2-sambaPipe.xls
+++ b/analyze/output/2-sambaPipe.xls
--- a/analyze/output/2.xls
+++ b/analyze/output/2.xls
--- a/analyze/output/20-test_videohash.xls
+++ b/analyze/output/20-test_videohash.xls
--- a/analyze/output/21-b_hashtables.xls
+++ b/analyze/output/21-b_hashtables.xls
--- a/analyze/output/22-photohash-master.xls
+++ b/analyze/output/22-photohash-master.xls
--- a/analyze/output/23-Third_step.xls
+++ b/analyze/output/23-Third_step.xls
--- a/analyze/output/24-truncate.xls
+++ b/analyze/output/24-truncate.xls
--- a/analyze/output/25-test_pseudonymizer.xls
+++ b/analyze/output/25-test_pseudonymizer.xls
--- a/analyze/output/26-pyworkshop.xls
+++ b/analyze/output/26-pyworkshop.xls
--- a/analyze/output/28-miniprojects.xls
+++ b/analyze/output/28-miniprojects.xls
--- a/analyze/output/29-spark-structured-streaming-window-udf-example.xls
+++ b/analyze/output/29-spark-structured-streaming-window-udf-example.xls
--- a/analyze/output/3-example.xls
+++ b/analyze/output/3-example.xls
--- a/analyze/output/30-data-synthesis-for-machine-learning.xls
+++ b/analyze/output/30-data-synthesis-for-machine-learning.xls
--- a/analyze/output/31-hana-my-thai-star-data-generator.xls
+++ b/analyze/output/31-hana-my-thai-star-data-generator.xls
--- a/analyze/output/32-sambaPipe.xls
+++ b/analyze/output/32-sambaPipe.xls
--- a/analyze/output/33-cmscontrib.xls
+++ b/analyze/output/33-cmscontrib.xls
--- a/analyze/output/4-confluent_cloud.xls
+++ b/analyze/output/4-confluent_cloud.xls
--- a/analyze/output/5-producer.xls
+++ b/analyze/output/5-producer.xls
--- a/analyze/output/6-producer.xls
+++ b/analyze/output/6-producer.xls
--- a/analyze/output/7-example.xls
+++ b/analyze/output/7-example.xls
--- a/analyze/output/8-test_client.xls
+++ b/analyze/output/8-test_client.xls
--- a/analyze/output/9-file_samples_hello_world.xls
+++ b/analyze/output/9-file_samples_hello_world.xls
--- a/analyze/output/Convert_JSON_to_CSV.xls
+++ b/analyze/output/Convert_JSON_to_CSV.xls
--- a/analyze/output/b_hashtables.xls
+++ b/analyze/output/b_hashtables.xls
--- a/analyze/output/cmdb-python-master-手工-2.xls
+++ b/analyze/output/cmdb-python-master-手工-2.xls
--- a/analyze/output/cmdb-python-master-手工.xls
+++ b/analyze/output/cmdb-python-master-手工.xls
--- a/analyze/output/cmdb-python-master-标准.xls
+++ b/analyze/output/cmdb-python-master-标准.xls
--- a/analyze/output/cmdb-python-master.xls
+++ b/analyze/output/cmdb-python-master.xls
--- a/analyze/output/cmscontrib.xls
+++ b/analyze/output/cmscontrib.xls
--- a/analyze/output/data-synthesis-for-machine-learning.xls
+++ b/analyze/output/data-synthesis-for-machine-learning.xls
--- a/analyze/output/file_samples_hello_world.xls
+++ b/analyze/output/file_samples_hello_world.xls
--- a/analyze/output/ghostpotato-master-X.xls
+++ b/analyze/output/ghostpotato-master-X.xls
--- a/analyze/output/hana-my-thai-star-data-generator.xls
+++ b/analyze/output/hana-my-thai-star-data-generator.xls
--- a/analyze/output/medical_data_visualizer.xls
+++ b/analyze/output/medical_data_visualizer.xls
--- a/analyze/output/nnja-python.xls
+++ b/analyze/output/nnja-python.xls
--- a/analyze/output/pseudonymizers.xls
+++ b/analyze/output/pseudonymizers.xls
--- a/analyze/output/python-mini-projects-master.xls
+++ b/analyze/output/python-mini-projects-master.xls
--- a/analyze/output/python-record-my-voice.xls
+++ b/analyze/output/python-record-my-voice.xls
--- a/analyze/output/pyworkshop.xls
+++ b/analyze/output/pyworkshop.xls
--- a/analyze/output/record-my-voice.xls
+++ b/analyze/output/record-my-voice.xls
--- a/analyze/output/roytuts-python.xls
+++ b/analyze/output/roytuts-python.xls
--- a/analyze/output/sambaPipe.xls
+++ b/analyze/output/sambaPipe.xls
--- a/analyze/output/save_historical_data.xls
+++ b/analyze/output/save_historical_data.xls
--- a/analyze/output/spark-structured-streaming-window-udf-example.xls
+++ b/analyze/output/spark-structured-streaming-window-udf-example.xls
--- a/analyze/output/test.xls
+++ b/analyze/output/test.xls
--- a/analyze/output1-libs3-master.xlsx
+++ b/analyze/output1-libs3-master.xlsx
--- a/analyze/output2/Convert_JSON_to_CSV.xls
+++ b/analyze/output2/Convert_JSON_to_CSV.xls
--- a/analyze/output2/Instagram_profile.xls
+++ b/analyze/output2/Instagram_profile.xls
--- a/analyze/output2/Random_password_generator.xls
+++ b/analyze/output2/Random_password_generator.xls
--- a/analyze/output2/chapter2.xls
+++ b/analyze/output2/chapter2.xls
--- a/analyze/output2/chapter4.xls
+++ b/analyze/output2/chapter4.xls
--- a/analyze/output2/chapter7.xls
+++ b/analyze/output2/chapter7.xls
--- a/analyze/output2/ds4ml.xls
+++ b/analyze/output2/ds4ml.xls
--- a/analyze/output2/fortest.xls
+++ b/analyze/output2/fortest.xls
--- a/analyze/output2/fortesthana.xls
+++ b/analyze/output2/fortesthana.xls
--- a/analyze/output2/mini.xls
+++ b/analyze/output2/mini.xls
--- a/analyze/output2/python-record-my-voice.xls
+++ b/analyze/output2/python-record-my-voice.xls
--- a/analyze/output2/roytuts-python.xls
+++ b/analyze/output2/roytuts-python.xls
--- a/analyze/output2/spark-structured-streaming-window-udf-example.xls
+++ b/analyze/output2/spark-structured-streaming-window-udf-example.xls
--- a/analyze/output2/src.xls
+++ b/analyze/output2/src.xls
--- a/analyze/output2/test.xls
+++ b/analyze/output2/test.xls
--- a/analyze/output2/validation.xlsx
+++ b/analyze/output2/validation.xlsx
--- a/analyze/progarm.xlsx
+++ b/analyze/progarm.xlsx
--- a/analyze/program2.xlsx
+++ b/analyze/program2.xlsx
--- a/analyze/~$combine.xlsx
+++ b/analyze/~$combine.xlsx
--- a/flaskBack.py
+++ b/flaskBack.py
@@ -0,0 +1,38 @@
 
				+from flask import Flask, request
			
 
				+from interface import annotate
			
 
				+from flask_cors import CORS
			
 
				+from flask import jsonify
			
 
				+from utils.fileio import load_json
			
 
				+
			
 
				+app = Flask("PrivacyScan")
			
 
				+
			
 
				+cors = CORS(app, resources={r"/scan": {"origins": "*"}})
			
 
				+
			
 
				+
			
 
				+@app.route("/scan", methods=['POST'])
			
 
				+def scan():
			
 
				+    source = request.get_json()['source']
			
 
				+    print(request.get_json())
			
 
				+    data_type = load_json('lattices/datatype_dictionary.json')
			
 
				+    purpose_dict = load_json('lattices/purpose_dictionary.json')
			
 
				+    lattice = {'dataType': data_type, 'purpose': purpose_dict}
			
 
				+
			
 
				+    result = annotate(source, lattice,
			
 
				+             False)
			
 
				+    # result = {
			
 
				+    #     'accuracy': {
			
 
				+    #         'recall_accurate': 10,
			
 
				+    #         'recall_location': 128,
			
 
				+    #         'location_num': 158
			
 
				+    #     },
			
 
				+    #     'missed': {
			
 
				+    #         'suspected_node_list': ["第一个文件 第一行", "第一个文件 第二行"],
			
 
				+    #         'missed': ["命中：第一个文件 第一行", "未命中：第二个文件第三行"]
			
 
				+    #     }
			
 
				+    # }
			
 
				+    print(source)
			
 
				+    return jsonify(result)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    app.run()
			
--- a/graphgen.py
+++ b/graphgen.py
@@ -0,0 +1,16 @@
 
				+import graphviz
			
 
				+import pyan
			
 
				+
			
 
				+from utils import fileio
			
 
				+
			
 
				+# root_dir1 = "/Users/liufan/program/PYTHON/SAP/PrivacyScan/systementrance.py"
			
 
				+# root_dir2 = "/Users/liufan/program/PYTHON/SAP/PrivacyScan/utils/fileio.py"
			
 
				+# root_list = [root_dir2, root_dir1]
			
 
				+
			
 
				+source_dir = "/Users/liufan/program/PYTHON/SAP/PrivacyScan"
			
 
				+file_list = fileio.walk_files_path(source_dir)
			
 
				+
			
 
				+res = pyan.create_callgraph(file_list, format="dot")
			
 
				+
			
 
				+graph = graphviz.Source(res)
			
 
				+graph.view()
			
--- a/history/program-azure-storage-blob.json
+++ b/history/program-azure-storage-blob.json
@@ -0,0 +1,100 @@
 
				+[
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 47",
			
 
				+    "DataType": "key",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 51",
			
 
				+    "DataType": "key",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 66",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Usage",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 66",
			
 
				+    "DataType": "key",
			
 
				+    "Purpose": "Usage",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_client.py 102",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Store/Local/File",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_client.py 94",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Store/Local/File",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 41",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 44",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 48",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 58",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 61",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Share/ExternalShare/ServiceProvider",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 67",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Store/Local/File",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 75",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Store/Local/File",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  },
			
 
				+  {
			
 
				+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_service.py 78",
			
 
				+    "DataType": "UserName",
			
 
				+    "Purpose": "Store/Local/File",
			
 
				+    "confidence": 1,
			
 
				+    "Script": ""
			
 
				+  }
			
 
				+]