瀏覽代碼

first commit

刘凡 2 年之前
當前提交
0cbe02b5ef
共有 100 個文件被更改,包括 572 次插入0 次删除
  1. 二進制
      .DS_Store
  2. 127 0
      Re 项目.md
  3. 20 0
      Templete.json
  4. 0 0
      accuracy/__init__.py
  5. 二進制
      accuracy/__pycache__/__init__.cpython-39.pyc
  6. 二進制
      accuracy/__pycache__/accuracytest.cpython-39.pyc
  7. 90 0
      accuracy/accuracytest.py
  8. 0 0
      algorithm/__init__.py
  9. 二進制
      algorithm/__pycache__/__init__.cpython-39.pyc
  10. 二進制
      algorithm/__pycache__/charactermatch.cpython-39.pyc
  11. 91 0
      algorithm/charactermatch.py
  12. 50 0
      algorithm/nlp.py
  13. 二進制
      analyze/.DS_Store
  14. 0 0
      analyze/__init__.py
  15. 二進制
      analyze/__pycache__/__init__.cpython-39.pyc
  16. 二進制
      analyze/__pycache__/outanalyze.cpython-39.pyc
  17. 二進制
      analyze/combine.xlsx
  18. 二進制
      analyze/hand2.xlsx
  19. 40 0
      analyze/outanalyze.py
  20. 二進制
      analyze/output/0-cmdb.xls
  21. 二進制
      analyze/output/1-tmp.xls
  22. 二進制
      analyze/output/1-upload.xls
  23. 二進制
      analyze/output/10-example-upload-public.xls
  24. 二進制
      analyze/output/11-test_upload.xls
  25. 二進制
      analyze/output/12-s3sendfile.xls
  26. 二進制
      analyze/output/13-s3.xls
  27. 二進制
      analyze/output/14-shutil.xls
  28. 二進制
      analyze/output/15-extract-censo.xls
  29. 二進制
      analyze/output/16-anscombes_quartet.xls
  30. 二進制
      analyze/output/17-annotate_simple01.xls
  31. 二進制
      analyze/output/18-views.xls
  32. 二進制
      analyze/output/19-test_hashids.xls
  33. 二進制
      analyze/output/2-sambaPipe.xls
  34. 二進制
      analyze/output/2.xls
  35. 二進制
      analyze/output/20-test_videohash.xls
  36. 二進制
      analyze/output/21-b_hashtables.xls
  37. 二進制
      analyze/output/22-photohash-master.xls
  38. 二進制
      analyze/output/23-Third_step.xls
  39. 二進制
      analyze/output/24-truncate.xls
  40. 二進制
      analyze/output/25-test_pseudonymizer.xls
  41. 二進制
      analyze/output/26-pyworkshop.xls
  42. 二進制
      analyze/output/28-miniprojects.xls
  43. 二進制
      analyze/output/29-spark-structured-streaming-window-udf-example.xls
  44. 二進制
      analyze/output/3-example.xls
  45. 二進制
      analyze/output/30-data-synthesis-for-machine-learning.xls
  46. 二進制
      analyze/output/31-hana-my-thai-star-data-generator.xls
  47. 二進制
      analyze/output/32-sambaPipe.xls
  48. 二進制
      analyze/output/33-cmscontrib.xls
  49. 二進制
      analyze/output/4-confluent_cloud.xls
  50. 二進制
      analyze/output/5-producer.xls
  51. 二進制
      analyze/output/6-producer.xls
  52. 二進制
      analyze/output/7-example.xls
  53. 二進制
      analyze/output/8-test_client.xls
  54. 二進制
      analyze/output/9-file_samples_hello_world.xls
  55. 二進制
      analyze/output/Convert_JSON_to_CSV.xls
  56. 二進制
      analyze/output/b_hashtables.xls
  57. 二進制
      analyze/output/cmdb-python-master-手工-2.xls
  58. 二進制
      analyze/output/cmdb-python-master-手工.xls
  59. 二進制
      analyze/output/cmdb-python-master-标准.xls
  60. 二進制
      analyze/output/cmdb-python-master.xls
  61. 二進制
      analyze/output/cmscontrib.xls
  62. 二進制
      analyze/output/data-synthesis-for-machine-learning.xls
  63. 二進制
      analyze/output/file_samples_hello_world.xls
  64. 二進制
      analyze/output/ghostpotato-master-X.xls
  65. 二進制
      analyze/output/hana-my-thai-star-data-generator.xls
  66. 二進制
      analyze/output/medical_data_visualizer.xls
  67. 二進制
      analyze/output/nnja-python.xls
  68. 二進制
      analyze/output/pseudonymizers.xls
  69. 二進制
      analyze/output/python-mini-projects-master.xls
  70. 二進制
      analyze/output/python-record-my-voice.xls
  71. 二進制
      analyze/output/pyworkshop.xls
  72. 二進制
      analyze/output/record-my-voice.xls
  73. 二進制
      analyze/output/roytuts-python.xls
  74. 二進制
      analyze/output/sambaPipe.xls
  75. 二進制
      analyze/output/save_historical_data.xls
  76. 二進制
      analyze/output/spark-structured-streaming-window-udf-example.xls
  77. 二進制
      analyze/output/test.xls
  78. 二進制
      analyze/output1-libs3-master.xlsx
  79. 二進制
      analyze/output2/Convert_JSON_to_CSV.xls
  80. 二進制
      analyze/output2/Instagram_profile.xls
  81. 二進制
      analyze/output2/Random_password_generator.xls
  82. 二進制
      analyze/output2/chapter2.xls
  83. 二進制
      analyze/output2/chapter4.xls
  84. 二進制
      analyze/output2/chapter7.xls
  85. 二進制
      analyze/output2/ds4ml.xls
  86. 二進制
      analyze/output2/fortest.xls
  87. 二進制
      analyze/output2/fortesthana.xls
  88. 二進制
      analyze/output2/mini.xls
  89. 二進制
      analyze/output2/python-record-my-voice.xls
  90. 二進制
      analyze/output2/roytuts-python.xls
  91. 二進制
      analyze/output2/spark-structured-streaming-window-udf-example.xls
  92. 二進制
      analyze/output2/src.xls
  93. 二進制
      analyze/output2/test.xls
  94. 二進制
      analyze/output2/validation.xlsx
  95. 二進制
      analyze/progarm.xlsx
  96. 二進制
      analyze/program2.xlsx
  97. 二進制
      analyze/~$combine.xlsx
  98. 38 0
      flaskBack.py
  99. 16 0
      graphgen.py
  100. 100 0
      history/program-azure-storage-blob.json

二進制
.DS_Store


+ 127 - 0
Re 项目.md

@@ -0,0 +1,127 @@
+Share
+Internal Share
+External Share
+w/ThirdParty
+
+w/ServiceProvider
+w/Consultant
+Store
+https://github.com/zenodo/zenodo/blob/7af3c9e57367d849f5151da68d8929cf5c0b9c7d/scripts/upload.py#L84
+Local
+File
+Directory
+https://github.com/Amzza0x00/ghostpotato/blob/master/examples/sambaPipe.py#L90
+Messaging System
+Kafka
+https://github.com/dpkp/kafka-python/blob/master/example.py#L20
+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/confluent_cloud.py#L98
+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/producer.py#L55
+https://github.com/owenliang/kafka/blob/master/producer.py
+NATS
+https://github.com/nats-io/nats.py/blob/main/examples/example.py
+https://github.com/Gr1N/nats-python/blob/master/tests/test_client.py#L111
+Cloud Service
+Azure
+https://github.com/MadTownMark/azure-storage-blob/blob/master/sdk/storage/azure-storage-file-share/samples/file_samples_hello_world.py#L75
+S3
+https://github.com/keithweaver/python-aws-s3/blob/master/example-upload-public.py
+https://github.com/boto/s3transfer/blob/develop/tests/integration/test_upload.py#L53
+https://github.com/nagwww/101-AWS-S3-Hacks/blob/master/s3sendfile.py#L18
+https://github.com/torchbox/christmas-video-2017/blob/master/xmasvideo/s3.py#L56
+https://github.com/pankajr141/libs3/blob/master/libs3/shutil.py#L111
+https://github.com/bruno990/igti-edc-desafiofinal/blob/main/extract-censo/extract-censo.py#L72
+Visualize
+https://github.com/mwaskom/seaborn/blob/master/examples/anscombes_quartet.py#L14
+https://github.com/matplotlib/matplotlib/blob/main/examples/userdemo/annotate_simple01.py#L12
+https://github.com/zachwill/flask-engine/blob/master/app/views.py#L28
+Archive
+Anonymize
+Hash
+https://github.com/davidaurelio/hashids-python/blob/master/test/test_hashids.py#L44
+https://github.com/akamhy/videohash/blob/main/tests/test_videohash.py#L17
+https://github.com/Dstar4/Hash-Tables/blob/master/basic_hashtable/b_hashtables.py#L92
+https://github.com/bunchesofdonald/photohash
+Truncate
+https://github.com/ojitha/code-challenge-1/blob/main/Third_step.py#L38
+https://github.com/beerfleet/udemy_tutorial/blob/master/Oefeningen/uDemy/bootcamp/lots_of_exercises/truncate.py#L18
+Pseudonym
+https://github.com/prechelt/pseudonymizer/blob/master/pseudonymizer/tests/test_pseudonymizer.py#L10
+https://github.wdf.sap.corp/ICN-Nanjing-Projects/Data-Anonymization/blob/master/ds4ml/command/synthesize.py#L78
+
+
+
+
+
+1. Kafka s3 nats  没有
+
+
+
+
+
+https://github.com/zenodo/zenodo
+
+https://github.com/Amzza0x00/ghostpotato
+
+ 
+
+https://github.com/dpkp/kafka-python
+
+https://github.com/confluentinc/confluent-kafka-python/blob/master/examples/
+
+https://github.com/owenliang/kafka
+
+ 
+
+https://github.com/nats-io/nats.py/blob/main/examples
+
+https://github.com/Gr1N/nats-python/blob/master/tests/
+
+ 
+
+https://github.com/MadTownMark/azure-storage-blob/blob/master/sdk/storage/azure-storage-file-share/samples/
+
+ 
+
+https://github.com/keithweaver/python-aws-s3/blob/master/example-upload-public.py
+
+https://github.com/boto/s3transfer/blob/develop/tests/integration/test_upload.py
+
+https://github.com/nagwww/101-AWS-S3-Hacks/blob/master/s3sendfile.py
+
+https://github.com/torchbox/christmas-video-2017/blob/master/xmasvideo/s3.py
+
+https://github.com/pankajr141/libs3/blob/master/libs3/shutil.py
+
+https://github.com/bruno990/igti-edc-desafiofinal/blob/main/extract-censo/extract-censo.py
+
+ 
+
+https://github.com/mwaskom/seaborn/blob/master/examples/anscombes_quartet.py
+
+https://github.com/matplotlib/matplotlib/blob/main/examples/userdemo/annotate_simple01.py
+
+https://github.com/zachwill/flask-engine/blob/master/app/views.py
+
+ 
+
+https://github.com/davidaurelio/hashids-python/blob/master/test/test_hashids.py
+
+https://github.com/akamhy/videohash/blob/main/tests/test_videohash.py
+
+https://github.com/Dstar4/Hash-Tables/blob/master/basic_hashtable/b_hashtables.py
+
+https://github.com/bunchesofdonald/photohash
+
+ 
+
+https://github.com/ojitha/code-challenge-1/blob/main/Third_step.py
+
+https://github.com/beerfleet/udemy_tutorial/blob/master/Oefeningen/uDemy/bootcamp/lots_of_exercises/truncate.py
+
+ 
+
+https://github.com/prechelt/pseudonymizer/blob/master/pseudonymizer/tests/test_pseudonymizer.py
+
+https://github.wdf.sap.corp/ICN-Nanjing-Projects/Data-Anonymization/blob/master/ds4ml/command/synthesize.py
+
+ 

+ 20 - 0
Templete.json

@@ -0,0 +1,20 @@
+[
+    {
+      "Location":"cmdb/views/controller/ambientController.py 159",
+      "DataType": "UserName",
+      "Purpose": "Usage",
+      "Note":""
+    },
+    {
+      "Location":"cmdb/views/controller/ambientController.py 159",
+      "DataType": "EmailAddredss",
+      "Purpose": "Usage",
+      "Note":""
+    },
+    {
+      "Location":"cmdb/views/controller/ambientController.py 159",
+      "DataType": "PhoneNumber",
+      "Purpose": "Usage",
+      "Note":""
+    }
+]

+ 0 - 0
accuracy/__init__.py


二進制
accuracy/__pycache__/__init__.cpython-39.pyc


二進制
accuracy/__pycache__/accuracytest.cpython-39.pyc


+ 90 - 0
accuracy/accuracytest.py

@@ -0,0 +1,90 @@
+import xlrd
+
+from utils.fileio import load_location, load_data_purpose_split, list_to_excel
+
+
+def test_recall_accuracy(suspected_node_list, source):
+    """
+    查全率=(检索出的相关信息量/系统中的相关信息总量)
+    :param suspected_node_list:
+    :param source:
+    :return:
+    """
+    location_dict = load_location("项目校对表-旧.xlsx")
+    location_num = len(location_dict.keys())
+    recall_location = 0
+    recall_accurate = 0
+    print("准确的结果如下:")
+    for node in suspected_node_list:
+        if node.private_info is None:
+            node.private_info = [(key_word[0], node.purpose) for key_word in node.private_word_list]
+        print((node.file_path.replace(source + '\\', ''), node.line_no, node.private_info,node.purpose))
+        if (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no) in location_dict.keys():
+            recall_location += 1
+            # print(node)
+            # print(location_dict[(node.file_path.replace(source + '/', ''), node.line_no)])
+            # print()
+
+            if node.private_info == location_dict[
+                (node.file_path.replace(source + '\\', '').replace("\\", '/'), node.line_no)]:
+                recall_accurate += 1
+    if recall_location>0:
+        print("查全率为: ", recall_accurate, "/", recall_location, '/', location_num, '/', recall_location / location_num)
+        print("查准率为: ", recall_accurate, "/", recall_location, '/', len(suspected_node_list), '/',
+              recall_location / len(suspected_node_list))
+    return {"recall_accurate": recall_accurate, "recall_location": recall_location, "location_num": location_num}
+
+
+def test_missed(suspected_node_list, source):
+    location_dict = load_location("项目校对表-旧.xlsx")
+    paths = [(node.file_path.replace(source + "\\", '').replace("\\", "/"), node.line_no) for node in
+             suspected_node_list]
+    res = []
+    for node in location_dict.keys():
+        if node not in paths:
+            # print("未命中:" + node[0] + str(node[1]))
+            res.append("未命中:" + node[0] + str(node[1]))
+        else:
+            # print("命中:" + node[0] + str(node[1]))
+            res.append("命中:" + node[0] + str(node[1]))
+    paths = [paths[0],paths[1],]
+    return {"suspected_node_list": paths, "missed": res}
+
+
+def test_stamp(stamp):
+    # print(stamp)
+    data_type_compute = []
+    purpose_compute = []
+    for st in stamp:
+        loc = st[0] + ' ' + str(st[1])
+        datatype = list(set([data[0] for data in st[2] if data[0] != 'Data']))
+        purpose = list(set([data[1] for data in st[2] if data[1] != 'Usage']))
+        for dt in datatype:
+            data_type_compute.append((loc, dt))
+        for pur in purpose:
+            purpose_compute.append((loc, pur))
+
+    data_type_list, purpose_list = load_data_purpose_split("/Users/liufan/program/PYTHON/SAP/privacyScanLsn/项目校对表-旧.xlsx")
+
+    for data in data_type_list:
+        if data not in data_type_compute:
+            print(data)
+
+    for pur in purpose_list:
+        if pur not in purpose_compute:
+            print(pur)
+
+    data_type_all = list(set(data_type_compute + data_type_list))
+    purpose_all = list(set(purpose_list + purpose_compute))
+
+    print("data_type准确率为: ", len(data_type_compute), "/", len(data_type_all),
+          len(data_type_compute) / len(data_type_all))
+    print("purpose准确率为: ", len(purpose_compute), "/", len(purpose_all),
+          len(purpose_compute) / len(purpose_all))
+
+    list_to_excel(r'analyze/output/cmdb-python-master-标准.xls', data_type_all, purpose_all)
+
+
+if __name__ == '__main__':
+    # load_location("项目校对表.xlsx")
+    test_stamp()

+ 0 - 0
algorithm/__init__.py


二進制
algorithm/__pycache__/__init__.cpython-39.pyc


二進制
algorithm/__pycache__/charactermatch.cpython-39.pyc


+ 91 - 0
algorithm/charactermatch.py

@@ -0,0 +1,91 @@
+import copy
+import difflib
+# import Levenshtein
+
+
+# duplicated
+def character_match_abbr(word_std, abbr, word):
+    if word.find(word_std) != -1:
+        return True
+    while word.find(abbr[0]) != -1 and word.find(abbr[0]) + 3 <= len(word):
+        word = word[word.find(abbr[0]):]
+        copy_abbr = copy.deepcopy(abbr)
+        flag = True
+        for i in range(3):
+            index = copy_abbr.find(word[0])
+            if index == -1:
+                flag = False
+                break
+            else:
+                copy_abbr = copy_abbr[index:]
+                word = word[1:]
+        if flag:
+            return True
+        else:
+            continue
+    return False
+
+
+def character_match(word_std, word):
+    """
+    模糊匹配
+    Args:
+        word_std:
+        word:
+
+    Returns:
+    script_path
+    """
+    word, word_std = word.lower().replace("_", ""), word_std.lower()
+    if word.find(word_std) != -1 or difflib.SequenceMatcher((lambda x: x in ["_", "/"]), word, word_std).ratio() > 0.9:
+        return True
+    else:
+        return False
+
+
+def word_match(word_std_list, word):
+    """
+
+    Args:
+        word_std_list: 可能的缩写类型
+        word: 查询的单词
+
+    Returns:
+        True/False
+
+    """
+    if "ip" in word_std_list:
+        word_std_list.remove("ip")
+        if word == "ip" or word == 'IP' or word == 'Ip':
+            return True
+    for word_std in word_std_list:
+        if character_match(word_std, word):
+            return True
+        else:
+            continue
+    return False
+
+
+def test_match(a, b):
+    print(b.find(a) != -1)
+    print(difflib.SequenceMatcher((lambda x: x in ["_", "/"]), a, b).ratio())
+
+    print()
+
+
+if __name__ == '__main__':
+    # print(word_match(["password", "pwd", "psw", "pswd"], "psd"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "userpwd"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "user_psw_1"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "pwa"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "passw"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "passpsw"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "user_password_a"))
+    # print(word_match(["password", "pwd", "psw", "pswd"], "psw_a"))
+    word_match(["pswd", "psw", "pwd", "password", "pass_word", "gitpass"], "gen_password")
+    word_match(["key"], "gitkey")
+    print(word_match(["Pseudonym", "alias"], "pseudonyms"))
+    # word_match(["ipaddr", "IPAddress", "ip"], "output_dir")
+    # word_match(["ipaddr", "IPAddress", "ip"], "os.path.pardir")
+
+# 包含+长度限制

+ 50 - 0
algorithm/nlp.py

@@ -0,0 +1,50 @@
+import torch
+from transformers import BertModel, BertTokenizer
+import numpy as np
+import time
+
+
+def get_word_vec(word):
+    # 这里我们调用bert-base模型,同时模型的词典经过小写处理
+    model_name = 'bert-base-uncased'
+    # 读取模型对应的tokenizer
+    tokenizer = BertTokenizer.from_pretrained(model_name)
+    # 载入模型
+    model = BertModel.from_pretrained(model_name)
+    # 输入文本
+    input_text = word
+    # 通过tokenizer把文本变成 token_id
+    input_ids = torch.tensor([tokenizer.encode(input_text_i) for input_text_i in input_text])
+    max_len = 10
+    # while len(input_ids)<max_len:
+    #     input_ids.
+    print(input_ids)
+    # input_ids: [101, 2182, 2003, 2070, 3793, 2000, 4372, 16044, 102]
+    # input_ids = torch.tensor([input_ids])
+    # 获得BERT模型最后一个隐层结果
+    with torch.no_grad():
+        last_hidden_states = model(input_ids)[0]  # Models outputs are now tuples
+        # print(model(input_ids))
+    print(last_hidden_states)
+    print(last_hidden_states.shape)
+    """ tensor([[[-0.0549,  0.1053, -0.1065,  ..., -0.3550,  0.0686,  0.6506],
+             [-0.5759, -0.3650, -0.1383,  ..., -0.6782,  0.2092, -0.1639],
+             [-0.1641, -0.5597,  0.0150,  ..., -0.1603, -0.1346,  0.6216],
+             ...,
+             [ 0.2448,  0.1254,  0.1587,  ..., -0.2749, -0.1163,  0.8809],
+             [ 0.0481,  0.4950, -0.2827,  ..., -0.6097, -0.1212,  0.2527],
+             [ 0.9046,  0.2137, -0.5897,  ...,  0.3040, -0.6172, -0.1950]]]) 
+        shape: (1, 9, 768)     
+    """
+    return last_hidden_states
+
+
+def get_cos_similar(v1: list, v2: list):
+    num = float(np.dot(v1, v2))  # 向量点乘
+    denom = np.linalg.norm(v1) * np.linalg.norm(v2)  # 求模长的乘积
+    return 0.5 + 0.5 * (num / denom) if denom != 0 else 0
+
+
+if __name__ == '__main__':
+    vecs = get_word_vec([["psw", "name"], ["git", "name"]])
+    print(get_cos_similar(vecs[0][0], vecs[1][0]))

二進制
analyze/.DS_Store


+ 0 - 0
analyze/__init__.py


二進制
analyze/__pycache__/__init__.cpython-39.pyc


二進制
analyze/__pycache__/outanalyze.cpython-39.pyc


二進制
analyze/combine.xlsx


二進制
analyze/hand2.xlsx


+ 40 - 0
analyze/outanalyze.py

@@ -0,0 +1,40 @@
+import xlwt
+
+from utils.fileio import load_location, write_json
+
+
+def out_analyze(node_list, source, save_file: str, entire=False):
+    book = xlwt.Workbook(encoding='utf-8')
+    sheet = book.add_sheet("DataType")
+
+    cols = ["Location", "Function", "DataType", "Purpose"]
+    if entire:
+        cols.remove("Function")
+
+    for i in range(len(cols)):
+        sheet.write(0, i, cols[i])
+
+    tmp_row = 1
+    for i in range(len(node_list)):
+        node = node_list[i]
+        file_path = node.file_path.replace('\\', '/').replace(source.replace('\\', '/') + '/', '').split('/')[-1]
+        location = file_path + "#L" + str(node.line_no)
+
+        for data_type, purpose in node.private_info:
+            if not data_type:
+                data_type = "None"
+            if not purpose:
+                purpose = "None"
+            if not node.func_name:
+                node.func_name = "None"
+            sheet.write(tmp_row, 0, location)
+            if not entire:
+                sheet.write(tmp_row, 1, node.func_name)
+                sheet.write(tmp_row, 2, data_type)
+                sheet.write(tmp_row, 3, purpose)
+            else:
+                sheet.write(tmp_row, 1, data_type)
+                sheet.write(tmp_row, 2, purpose)
+            tmp_row += 1
+
+    book.save(save_file)

二進制
analyze/output/0-cmdb.xls


二進制
analyze/output/1-tmp.xls


二進制
analyze/output/1-upload.xls


二進制
analyze/output/10-example-upload-public.xls


二進制
analyze/output/11-test_upload.xls


二進制
analyze/output/12-s3sendfile.xls


二進制
analyze/output/13-s3.xls


二進制
analyze/output/14-shutil.xls


二進制
analyze/output/15-extract-censo.xls


二進制
analyze/output/16-anscombes_quartet.xls


二進制
analyze/output/17-annotate_simple01.xls


二進制
analyze/output/18-views.xls


二進制
analyze/output/19-test_hashids.xls


二進制
analyze/output/2-sambaPipe.xls


二進制
analyze/output/2.xls


二進制
analyze/output/20-test_videohash.xls


二進制
analyze/output/21-b_hashtables.xls


二進制
analyze/output/22-photohash-master.xls


二進制
analyze/output/23-Third_step.xls


二進制
analyze/output/24-truncate.xls


二進制
analyze/output/25-test_pseudonymizer.xls


二進制
analyze/output/26-pyworkshop.xls


二進制
analyze/output/28-miniprojects.xls


二進制
analyze/output/29-spark-structured-streaming-window-udf-example.xls


二進制
analyze/output/3-example.xls


二進制
analyze/output/30-data-synthesis-for-machine-learning.xls


二進制
analyze/output/31-hana-my-thai-star-data-generator.xls


二進制
analyze/output/32-sambaPipe.xls


二進制
analyze/output/33-cmscontrib.xls


二進制
analyze/output/4-confluent_cloud.xls


二進制
analyze/output/5-producer.xls


二進制
analyze/output/6-producer.xls


二進制
analyze/output/7-example.xls


二進制
analyze/output/8-test_client.xls


二進制
analyze/output/9-file_samples_hello_world.xls


二進制
analyze/output/Convert_JSON_to_CSV.xls


二進制
analyze/output/b_hashtables.xls


二進制
analyze/output/cmdb-python-master-手工-2.xls


二進制
analyze/output/cmdb-python-master-手工.xls


二進制
analyze/output/cmdb-python-master-标准.xls


二進制
analyze/output/cmdb-python-master.xls


二進制
analyze/output/cmscontrib.xls


二進制
analyze/output/data-synthesis-for-machine-learning.xls


二進制
analyze/output/file_samples_hello_world.xls


二進制
analyze/output/ghostpotato-master-X.xls


二進制
analyze/output/hana-my-thai-star-data-generator.xls


二進制
analyze/output/medical_data_visualizer.xls


二進制
analyze/output/nnja-python.xls


二進制
analyze/output/pseudonymizers.xls


二進制
analyze/output/python-mini-projects-master.xls


二進制
analyze/output/python-record-my-voice.xls


二進制
analyze/output/pyworkshop.xls


二進制
analyze/output/record-my-voice.xls


二進制
analyze/output/roytuts-python.xls


二進制
analyze/output/sambaPipe.xls


二進制
analyze/output/save_historical_data.xls


二進制
analyze/output/spark-structured-streaming-window-udf-example.xls


二進制
analyze/output/test.xls


二進制
analyze/output1-libs3-master.xlsx


二進制
analyze/output2/Convert_JSON_to_CSV.xls


二進制
analyze/output2/Instagram_profile.xls


二進制
analyze/output2/Random_password_generator.xls


二進制
analyze/output2/chapter2.xls


二進制
analyze/output2/chapter4.xls


二進制
analyze/output2/chapter7.xls


二進制
analyze/output2/ds4ml.xls


二進制
analyze/output2/fortest.xls


二進制
analyze/output2/fortesthana.xls


二進制
analyze/output2/mini.xls


二進制
analyze/output2/python-record-my-voice.xls


二進制
analyze/output2/roytuts-python.xls


二進制
analyze/output2/spark-structured-streaming-window-udf-example.xls


二進制
analyze/output2/src.xls


二進制
analyze/output2/test.xls


二進制
analyze/output2/validation.xlsx


二進制
analyze/progarm.xlsx


二進制
analyze/program2.xlsx


二進制
analyze/~$combine.xlsx


+ 38 - 0
flaskBack.py

@@ -0,0 +1,38 @@
+from flask import Flask, request
+from interface import annotate
+from flask_cors import CORS
+from flask import jsonify
+from utils.fileio import load_json
+
+app = Flask("PrivacyScan")
+
+cors = CORS(app, resources={r"/scan": {"origins": "*"}})
+
+
+@app.route("/scan", methods=['POST'])
+def scan():
+    source = request.get_json()['source']
+    print(request.get_json())
+    data_type = load_json('lattices/datatype_dictionary.json')
+    purpose_dict = load_json('lattices/purpose_dictionary.json')
+    lattice = {'dataType': data_type, 'purpose': purpose_dict}
+
+    result = annotate(source, lattice,
+             False)
+    # result = {
+    #     'accuracy': {
+    #         'recall_accurate': 10,
+    #         'recall_location': 128,
+    #         'location_num': 158
+    #     },
+    #     'missed': {
+    #         'suspected_node_list': ["第一个文件 第一行", "第一个文件 第二行"],
+    #         'missed': ["命中:第一个文件 第一行", "未命中:第二个文件第三行"]
+    #     }
+    # }
+    print(source)
+    return jsonify(result)
+
+
+if __name__ == '__main__':
+    app.run()

+ 16 - 0
graphgen.py

@@ -0,0 +1,16 @@
+import graphviz
+import pyan
+
+from utils import fileio
+
+# root_dir1 = "/Users/liufan/program/PYTHON/SAP/PrivacyScan/systementrance.py"
+# root_dir2 = "/Users/liufan/program/PYTHON/SAP/PrivacyScan/utils/fileio.py"
+# root_list = [root_dir2, root_dir1]
+
+source_dir = "/Users/liufan/program/PYTHON/SAP/PrivacyScan"
+file_list = fileio.walk_files_path(source_dir)
+
+res = pyan.create_callgraph(file_list, format="dot")
+
+graph = graphviz.Source(res)
+graph.view()

+ 100 - 0
history/program-azure-storage-blob.json

@@ -0,0 +1,100 @@
+[
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 47",
+    "DataType": "key",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 51",
+    "DataType": "key",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 66",
+    "DataType": "UserName",
+    "Purpose": "Usage",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_authentication.py 66",
+    "DataType": "key",
+    "Purpose": "Usage",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_client.py 102",
+    "DataType": "UserName",
+    "Purpose": "Store/Local/File",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_client.py 94",
+    "DataType": "UserName",
+    "Purpose": "Store/Local/File",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 41",
+    "DataType": "UserName",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 44",
+    "DataType": "UserName",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 48",
+    "DataType": "UserName",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 58",
+    "DataType": "UserName",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 61",
+    "DataType": "UserName",
+    "Purpose": "Share/ExternalShare/ServiceProvider",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 67",
+    "DataType": "UserName",
+    "Purpose": "Store/Local/File",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_hello_world.py 75",
+    "DataType": "UserName",
+    "Purpose": "Store/Local/File",
+    "confidence": 1,
+    "Script": ""
+  },
+  {
+    "Location": "D:\\Download\\azure-storage-blob-master\\sdk\\storage\\azure-storage-file-share\\samples\\file_samples_service.py 78",
+    "DataType": "UserName",
+    "Purpose": "Store/Local/File",
+    "confidence": 1,
+    "Script": ""
+  }
+]

部分文件因文件數量過多而無法顯示