Browse Source

更新训练数据

刘凡 2 years ago
parent
commit
b8ad6e27b3

File diff suppressed because it is too large
+ 0 - 1472
data/traindata/test/Directory.txt


File diff suppressed because it is too large
+ 253 - 779
data/traindata/test/Hash.txt


+ 340 - 306
data/traindata/test/Pseudonym.txt

@@ -1,47 +1,51 @@
 1 48 1 2 10
 2 10 1 3 9
-1 48 1 4 12
-4 12 1 5 77
-5 77 1 6 93
-4 12 1 7 26
-7 26 1 8 17
-8 17 1 9 77
-9 77 1 10 69
-8 17 1 10 69
-7 26 1 12 31
-7 26 1 13 77
-13 77 1 10 69
-1 48 1 15 87
-15 87 1 16 77
-16 77 1 10 69
-? 4 Pseudonym/Data-Masking/redact_address_2.py
-
-1 48 1 2 10
-2 10 1 3 9
-1 48 1 4 87
-4 87 1 5 26
-5 26 1 6 17
-6 17 1 7 77
-7 77 1 8 69
-6 17 1 8 69
-5 26 1 10 26
-10 26 1 11 17
-11 17 1 12 77
-12 77 1 8 69
-11 17 1 8 69
-10 26 1 15 66
-15 66 1 16 10
-16 10 1 17 9
-15 66 1 18 26
-18 26 1 19 17
-19 17 1 20 77
-20 77 1 8 69
-19 17 1 8 69
-18 26 1 23 31
-18 26 1 24 31
-18 26 1 25 77
-25 77 1 8 69
-? 4 Pseudonym/Data-Masking/redact_emails_1.py
+2 10 1 4 9
+1 48 1 5 12
+5 12 1 6 77
+6 77 1 7 93
+5 12 1 8 67
+8 67 1 9 69
+1 48 1 10 54
+10 54 1 11 29
+11 29 1 12 17
+12 17 1 13 77
+13 77 1 9 69
+12 17 1 9 69
+11 29 1 16 62
+11 29 1 17 31
+10 54 1 18 46
+18 46 1 19 77
+19 77 1 7 93
+18 46 1 21 17
+21 17 1 22 77
+22 77 1 9 69
+21 17 1 9 69
+18 46 1 25 54
+25 54 1 26 29
+26 29 1 27 77
+27 77 1 9 69
+26 29 1 29 82
+26 29 1 30 17
+30 17 1 31 77
+31 77 1 9 69
+30 17 1 9 69
+25 54 1 34 42
+34 42 1 35 26
+35 26 1 36 17
+36 17 1 37 77
+37 77 1 9 69
+36 17 1 9 69
+35 26 1 40 77
+40 77 1 9 69
+10 54 1 42 12
+42 12 1 43 17
+43 17 1 44 77
+44 77 1 9 69
+43 17 1 7 93
+42 12 1 47 77
+47 77 1 9 69
+? 4 Pseudonym/dataFrameAnonymizer_1.py
 
 1 48 1 2 10
 2 10 1 3 9
@@ -206,23 +210,6 @@
 
 1 48 1 2 10
 2 10 1 3 9
-1 48 1 4 87
-4 87 1 5 26
-5 26 1 6 17
-6 17 1 7 77
-7 77 1 8 69
-6 17 1 8 69
-5 26 1 10 26
-10 26 1 11 17
-11 17 1 12 77
-12 77 1 8 69
-11 17 1 8 69
-10 26 1 15 77
-15 77 1 8 69
-? 4 Pseudonym/Data-Masking/redact_numerics_2.py
-
-1 48 1 2 10
-2 10 1 3 9
 1 48 1 4 12
 4 12 1 5 77
 5 77 1 6 93
@@ -231,261 +218,308 @@
 8 17 1 9 77
 9 77 1 10 69
 8 17 1 10 69
-7 26 1 12 66
-12 66 1 13 10
-13 10 1 14 9
-12 66 1 15 26
-15 26 1 16 17
-16 17 1 17 77
-17 77 1 10 69
-16 17 1 10 69
-15 26 1 20 31
-15 26 1 21 31
-15 26 1 22 77
-22 77 1 10 69
-1 48 1 24 12
-24 12 1 25 77
-25 77 1 6 93
-24 12 1 27 26
-27 26 1 28 17
-28 17 1 29 77
+7 26 1 12 31
+7 26 1 13 31
+7 26 1 14 77
+14 77 1 10 69
+1 48 1 16 12
+16 12 1 17 77
+17 77 1 6 93
+16 12 1 19 26
+19 26 1 20 17
+20 17 1 21 77
+21 77 1 10 69
+20 17 1 10 69
+19 26 1 24 31
+19 26 1 25 31
+19 26 1 26 77
+26 77 1 10 69
+1 48 1 28 87
+28 87 1 29 77
 29 77 1 10 69
-28 17 1 10 69
-27 26 1 32 66
-32 66 1 33 10
-33 10 1 34 9
-32 66 1 35 26
-35 26 1 36 17
-36 17 1 37 77
-37 77 1 10 69
-36 17 1 10 69
-35 26 1 40 31
-35 26 1 41 31
-35 26 1 42 77
-42 77 1 10 69
-1 48 1 44 12
-44 12 1 45 77
-45 77 1 6 93
-44 12 1 47 26
-47 26 1 48 17
-48 17 1 49 77
-49 77 1 10 69
-48 17 1 10 69
-47 26 1 52 66
-52 66 1 53 10
-53 10 1 54 9
-52 66 1 55 26
-55 26 1 56 17
-56 17 1 57 77
-57 77 1 10 69
-56 17 1 10 69
-55 26 1 60 31
-55 26 1 61 31
-55 26 1 62 77
-62 77 1 10 69
-1 48 1 64 87
-64 87 1 65 77
-65 77 1 10 69
-? 4 Pseudonym/Data-Masking/redact_datetime_1.py
+? 4 Pseudonym/Data-Masking/redact_numerics_1.py
 
 1 48 1 2 10
-1 48 1 3 12
-3 12 1 4 77
-4 77 1 5 93
-3 12 1 6 67
-6 67 1 7 31
-6 67 1 8 31
-6 67 1 9 31
-6 67 1 10 31
-6 67 1 11 31
-6 67 1 12 31
-6 67 1 13 31
-6 67 1 14 31
-6 67 1 15 31
-6 67 1 16 31
-6 67 1 17 31
-6 67 1 18 31
-6 67 1 19 31
-6 67 1 20 31
-6 67 1 21 31
-6 67 1 22 31
-6 67 1 23 31
-6 67 1 24 31
-6 67 1 25 31
-6 67 1 26 31
-6 67 1 27 31
-6 67 1 28 31
-6 67 1 29 31
-6 67 1 30 31
-6 67 1 31 31
-6 67 1 32 31
-6 67 1 33 31
-6 67 1 34 31
-6 67 1 35 31
-6 67 1 36 31
-6 67 1 37 31
-6 67 1 38 31
-6 67 1 39 31
-6 67 1 40 31
-6 67 1 41 69
-1 48 1 42 87
-42 87 1 43 26
-43 26 1 44 17
-44 17 1 45 77
-45 77 1 41 69
-44 17 1 41 69
-43 26 1 48 26
-48 26 1 49 17
-49 17 1 50 77
-50 77 1 41 69
-49 17 1 41 69
-48 26 1 53 77
-53 77 1 41 69
-? 4 Pseudonym/Data-Masking/__main___1.py
+2 10 1 3 9
+1 48 1 4 51
+1 48 1 5 12
+5 12 1 6 77
+6 77 1 7 93
+5 12 1 8 26
+8 26 1 9 17
+9 17 1 10 77
+10 77 1 11 69
+9 17 1 11 69
+8 26 1 13 77
+13 77 1 11 69
+1 48 1 15 54
+15 54 1 16 29
+16 29 1 17 77
+17 77 1 11 69
+16 29 1 19 63
+16 29 1 20 31
+15 54 1 21 87
+21 87 1 22 77
+22 77 1 11 69
+1 48 1 24 54
+24 54 1 25 29
+25 29 1 26 77
+26 77 1 11 69
+25 29 1 19 63
+25 29 1 29 31
+24 54 1 30 42
+30 42 1 31 26
+31 26 1 32 77
+32 77 1 11 69
+31 26 1 34 20
+34 20 1 35 31
+34 20 1 36 74
+34 20 1 37 77
+37 77 1 11 69
+24 54 1 39 12
+39 12 1 40 77
+40 77 1 7 93
+39 12 1 42 26
+42 26 1 43 77
+43 77 1 11 69
+42 26 1 45 31
+24 54 1 46 54
+46 54 1 47 29
+47 29 1 48 77
+48 77 1 11 69
+47 29 1 50 39
+47 29 1 51 31
+46 54 1 52 12
+52 12 1 53 77
+53 77 1 7 93
+52 12 1 55 31
+46 54 1 56 42
+56 42 1 57 26
+57 26 1 58 77
+58 77 1 11 69
+57 26 1 60 31
+46 54 1 61 42
+61 42 1 62 26
+62 26 1 63 77
+63 77 1 11 69
+1 48 1 65 103
+65 103 1 66 31
+65 103 1 67 12
+67 12 1 68 77
+68 77 1 7 93
+67 12 1 70 67
+70 67 1 11 69
+65 103 1 72 12
+72 12 1 73 77
+73 77 1 7 93
+72 12 1 75 31
+65 103 1 76 12
+76 12 1 77 77
+77 77 1 7 93
+76 12 1 79 31
+65 103 1 80 46
+80 46 1 81 77
+81 77 1 7 93
+80 46 1 83 26
+83 26 1 84 77
+84 77 1 11 69
+83 26 1 86 31
+80 46 1 87 54
+87 54 1 88 29
+88 29 1 89 77
+89 77 1 11 69
+88 29 1 50 39
+88 29 1 92 31
+87 54 1 93 12
+93 12 1 94 77
+94 77 1 7 93
+93 12 1 96 26
+96 26 1 97 17
+97 17 1 98 77
+98 77 1 11 69
+97 17 1 11 69
+96 26 1 101 31
+96 26 1 102 31
+87 54 1 103 12
+103 12 1 104 77
+104 77 1 7 93
+103 12 1 106 26
+106 26 1 107 17
+107 17 1 108 77
+108 77 1 11 69
+107 17 1 11 69
+106 26 1 111 31
+106 26 1 112 31
+80 46 1 113 42
+113 42 1 114 26
+114 26 1 115 17
+115 17 1 116 77
+116 77 1 11 69
+115 17 1 11 69
+114 26 1 119 77
+119 77 1 11 69
+80 46 1 121 18
+121 18 1 122 77
+122 77 1 7 93
+121 18 1 124 3
+121 18 1 125 26
+125 26 1 126 77
+126 77 1 11 69
+125 26 1 128 77
+128 77 1 11 69
+80 46 1 130 18
+130 18 1 131 77
+131 77 1 7 93
+130 18 1 124 3
+130 18 1 134 20
+134 20 1 135 77
+135 77 1 11 69
+134 20 1 137 76
+134 20 1 138 20
+138 20 1 139 31
+138 20 1 140 94
+138 20 1 141 77
+141 77 1 11 69
+65 103 1 143 12
+143 12 1 144 77
+144 77 1 7 93
+143 12 1 146 20
+146 20 1 147 31
+146 20 1 140 94
+146 20 1 149 20
+149 20 1 150 77
+150 77 1 11 69
+149 20 1 36 74
+149 20 1 153 31
+65 103 1 154 54
+154 54 1 155 29
+155 29 1 156 77
+156 77 1 11 69
+155 29 1 50 39
+155 29 1 159 31
+154 54 1 160 12
+160 12 1 161 77
+161 77 1 7 93
+160 12 1 163 31
+65 103 1 164 54
+164 54 1 165 29
+165 29 1 166 77
+166 77 1 11 69
+165 29 1 168 81
+165 29 1 169 31
+164 54 1 170 18
+170 18 1 171 77
+171 77 1 7 93
+170 18 1 124 3
+170 18 1 174 26
+174 26 1 175 77
+175 77 1 11 69
+174 26 1 177 77
+177 77 1 11 69
+164 54 1 179 12
+179 12 1 180 77
+180 77 1 7 93
+179 12 1 182 26
+182 26 1 183 77
+183 77 1 11 69
+182 26 1 185 77
+185 77 1 11 69
+164 54 1 187 12
+187 12 1 188 95
+188 95 1 189 77
+189 77 1 11 69
+188 95 1 191 77
+191 77 1 11 69
+188 95 1 7 93
+187 12 1 194 77
+194 77 1 11 69
+164 54 1 196 87
+196 87 1 197 77
+197 77 1 11 69
+? 4 Pseudonym/anonymize_6.py
+
+1 48 1 2 10
+2 10 1 3 9
+1 48 1 4 51
+1 48 1 5 12
+5 12 1 6 77
+6 77 1 7 93
+5 12 1 8 26
+8 26 1 9 17
+9 17 1 10 31
+9 17 1 11 69
+8 26 1 12 95
+12 95 1 13 26
+13 26 1 14 17
+14 17 1 15 77
+15 77 1 11 69
+14 17 1 11 69
+12 95 1 18 31
+12 95 1 11 69
+8 26 1 20 95
+20 95 1 21 26
+21 26 1 22 17
+22 17 1 23 77
+23 77 1 11 69
+22 17 1 11 69
+20 95 1 26 31
+20 95 1 11 69
+8 26 1 28 77
+28 77 1 11 69
+8 26 1 30 95
+30 95 1 31 26
+31 26 1 32 17
+32 17 1 33 77
+33 77 1 11 69
+32 17 1 11 69
+30 95 1 36 31
+30 95 1 11 69
+1 48 1 38 18
+38 18 1 39 77
+39 77 1 7 93
+38 18 1 41 3
+38 18 1 42 31
+1 48 1 43 87
+43 87 1 44 77
+44 77 1 11 69
+? 4 Pseudonym/pseudodepseudonimizer_1.py
+
+1 48 1 2 10
+2 10 1 3 9
+2 10 1 4 9
+2 10 1 5 9
+2 10 1 6 9
+1 48 1 7 87
+7 87 1 8 26
+8 26 1 9 17
+9 17 1 10 77
+10 77 1 11 69
+9 17 1 11 69
+8 26 1 13 77
+13 77 1 11 69
+8 26 1 15 77
+15 77 1 11 69
+8 26 1 17 65
+17 65 1 18 77
+18 77 1 11 69
+1 48 1 20 77
+20 77 1 11 69
+? 4 Pseudonym/dataFrameAnonymizer_4.py
 
 1 48 1 2 10
 2 10 1 3 9
-1 48 1 4 42
-4 42 1 5 26
+1 48 1 4 87
+4 87 1 5 26
 5 26 1 6 17
 6 17 1 7 77
 7 77 1 8 69
 6 17 1 8 69
-5 26 1 10 31
-1 48 1 11 12
-11 12 1 12 77
-12 77 1 13 93
-11 12 1 14 67
-14 67 1 8 69
-1 48 1 16 46
-16 46 1 17 77
-17 77 1 13 93
-16 46 1 19 26
-19 26 1 20 77
-20 77 1 8 69
-19 26 1 22 26
-22 26 1 23 77
-23 77 1 8 69
-22 26 1 25 77
-25 77 1 8 69
-16 46 1 27 12
-27 12 1 28 77
-28 77 1 13 93
-27 12 1 30 95
-30 95 1 31 17
-31 17 1 32 77
-32 77 1 8 69
-31 17 1 8 69
-30 95 1 35 77
-35 77 1 8 69
-30 95 1 8 69
-16 46 1 38 42
-38 42 1 39 26
-39 26 1 40 17
-40 17 1 41 77
-41 77 1 8 69
-40 17 1 8 69
-39 26 1 44 26
-44 26 1 45 17
-45 17 1 46 17
-46 17 1 47 77
-47 77 1 8 69
-46 17 1 8 69
-45 17 1 8 69
-44 26 1 51 26
-51 26 1 52 17
-52 17 1 53 77
-53 77 1 8 69
-52 17 1 8 69
-1 48 1 56 12
-56 12 1 57 77
-57 77 1 13 93
-56 12 1 59 67
-59 67 1 8 69
-1 48 1 61 46
-61 46 1 62 77
-62 77 1 13 93
-61 46 1 64 26
-64 26 1 65 77
-65 77 1 8 69
-64 26 1 67 26
-67 26 1 68 77
-68 77 1 8 69
-67 26 1 70 77
-70 77 1 8 69
-61 46 1 72 12
-72 12 1 73 77
-73 77 1 13 93
-72 12 1 75 67
-75 67 1 8 69
-61 46 1 77 46
-77 46 1 78 97
-78 97 1 79 77
-79 77 1 13 93
-78 97 1 81 77
-81 77 1 13 93
-78 97 1 13 93
-77 46 1 84 95
-84 95 1 85 77
-85 77 1 8 69
-84 95 1 87 77
-87 77 1 8 69
-84 95 1 8 69
-77 46 1 90 54
-90 54 1 91 24
-91 24 1 92 83
-91 24 1 93 29
-93 29 1 94 77
-94 77 1 8 69
-93 29 1 96 39
-93 29 1 97 31
-91 24 1 98 29
-98 29 1 99 77
-99 77 1 8 69
-98 29 1 96 39
-98 29 1 102 31
-91 24 1 103 29
-103 29 1 104 77
-104 77 1 8 69
-103 29 1 96 39
-103 29 1 107 31
-90 54 1 108 42
-108 42 1 109 26
-109 26 1 110 17
-110 17 1 111 77
-111 77 1 8 69
-110 17 1 8 69
-109 26 1 114 31
-90 54 1 115 42
-115 42 1 116 26
-116 26 1 117 17
-117 17 1 118 77
-118 77 1 8 69
-117 17 1 8 69
-116 26 1 121 77
-121 77 1 8 69
-61 46 1 123 42
-123 42 1 124 26
-124 26 1 125 17
-125 17 1 126 77
-126 77 1 8 69
-125 17 1 8 69
-124 26 1 129 26
-129 26 1 130 77
-130 77 1 8 69
-129 26 1 132 26
-132 26 1 133 17
-133 17 1 134 31
-133 17 1 8 69
-132 26 1 136 77
-136 77 1 8 69
-1 48 1 138 87
-138 87 1 139 26
-139 26 1 140 17
-140 17 1 141 77
-141 77 1 8 69
-140 17 1 8 69
-139 26 1 144 77
-144 77 1 8 69
-? 4 Pseudonym/Data-Masking/redact_names_1.py
+5 26 1 10 26
+10 26 1 11 17
+11 17 1 12 77
+12 77 1 8 69
+11 17 1 8 69
+10 26 1 15 77
+15 77 1 8 69
+? 4 Pseudonym/Data-Masking/redact_numerics_2.py
 

File diff suppressed because it is too large
+ 2062 - 1939
data/traindata/test/Share.txt


File diff suppressed because it is too large
+ 641 - 755
data/traindata/test/kafka.txt


+ 137 - 118
data/traindata/test/truncate.txt

@@ -193,123 +193,142 @@
 
 1 48 1 2 10
 2 10 1 3 9
-2 10 1 4 9
-1 48 1 5 42
-5 42 1 6 31
-1 48 1 7 12
-7 12 1 8 77
-8 77 1 9 93
-7 12 1 10 26
-10 26 1 11 77
-11 77 1 12 69
-10 26 1 13 77
-13 77 1 12 69
-10 26 1 15 26
-15 26 1 16 17
-16 17 1 17 77
-17 77 1 12 69
-16 17 1 12 69
-15 26 1 20 31
-1 48 1 21 12
+1 48 1 4 12
+4 12 1 5 77
+5 77 1 6 93
+4 12 1 7 26
+7 26 1 8 17
+8 17 1 9 77
+9 77 1 10 69
+8 17 1 10 69
+1 48 1 12 46
+12 46 1 13 97
+13 97 1 14 77
+14 77 1 6 93
+13 97 1 16 77
+16 77 1 6 93
+13 97 1 6 93
+12 46 1 19 77
+19 77 1 10 69
+12 46 1 21 12
 21 12 1 22 77
-22 77 1 9 93
-21 12 1 24 95
-24 95 1 25 77
-25 77 1 12 69
-24 95 1 27 31
-24 95 1 12 69
-1 48 1 29 12
-29 12 1 30 77
-30 77 1 9 93
-29 12 1 32 95
-32 95 1 33 77
-33 77 1 12 69
-32 95 1 35 31
-32 95 1 12 69
-1 48 1 37 54
-37 54 1 38 29
-38 29 1 39 26
-39 26 1 40 77
-40 77 1 12 69
-39 26 1 42 77
-42 77 1 12 69
-38 29 1 44 72
-38 29 1 45 77
-45 77 1 12 69
-37 54 1 47 87
-47 87 1 48 77
-48 77 1 12 69
-1 48 1 50 12
-50 12 1 51 77
-51 77 1 9 93
-50 12 1 53 26
-53 26 1 54 17
-54 17 1 55 95
-55 95 1 56 77
-56 77 1 12 69
-55 95 1 58 91
-58 91 1 59 77
-59 77 1 12 69
-58 91 1 61 77
-61 77 1 12 69
-55 95 1 12 69
-54 17 1 12 69
-53 26 1 65 31
-1 48 1 66 54
-66 54 1 67 77
-67 77 1 12 69
-66 54 1 69 46
-69 46 1 70 77
-70 77 1 9 93
-69 46 1 72 26
-72 26 1 73 77
-73 77 1 12 69
-72 26 1 75 77
-75 77 1 12 69
-72 26 1 77 26
-77 26 1 78 77
-78 77 1 12 69
-77 26 1 80 77
-80 77 1 12 69
-69 46 1 82 54
-82 54 1 83 24
-83 24 1 84 6
-83 24 1 85 29
-85 29 1 86 95
-86 95 1 87 77
-87 77 1 12 69
-86 95 1 89 77
-89 77 1 12 69
-86 95 1 12 69
-85 29 1 92 39
-85 29 1 93 31
-83 24 1 94 29
-94 29 1 95 77
-95 77 1 12 69
-94 29 1 97 71
-94 29 1 98 77
-98 77 1 12 69
-82 54 1 100 12
-100 12 1 101 77
-101 77 1 9 93
-100 12 1 103 77
-103 77 1 12 69
-66 54 1 105 12
-105 12 1 106 77
-106 77 1 9 93
-105 12 1 108 20
-108 20 1 109 95
-109 95 1 110 77
-110 77 1 12 69
-109 95 1 112 91
-112 91 1 113 31
-112 91 1 114 77
-114 77 1 12 69
-109 95 1 12 69
-108 20 1 117 3
-108 20 1 118 31
-1 48 1 119 87
-119 87 1 120 77
-120 77 1 12 69
-? 6 truncate/addup/smartTruncate_1.py
+22 77 1 6 93
+21 12 1 24 26
+24 26 1 25 17
+25 17 1 26 77
+26 77 1 10 69
+25 17 1 10 69
+24 26 1 29 77
+29 77 1 10 69
+24 26 1 31 77
+31 77 1 10 69
+24 26 1 33 77
+33 77 1 10 69
+1 48 1 35 12
+35 12 1 36 77
+36 77 1 6 93
+35 12 1 38 26
+38 26 1 39 17
+39 17 1 40 77
+40 77 1 10 69
+39 17 1 10 69
+38 26 1 43 31
+1 48 1 44 54
+44 54 1 45 29
+45 29 1 46 26
+46 26 1 47 77
+47 77 1 10 69
+46 26 1 49 77
+49 77 1 10 69
+45 29 1 51 52
+45 29 1 52 31
+44 54 1 53 12
+53 12 1 54 77
+54 77 1 6 93
+53 12 1 56 20
+56 20 1 57 95
+57 95 1 58 77
+58 77 1 10 69
+57 95 1 60 31
+57 95 1 10 69
+56 20 1 62 3
+56 20 1 63 31
+1 48 1 64 46
+64 46 1 65 77
+65 77 1 6 93
+64 46 1 67 26
+67 26 1 68 17
+68 17 1 69 77
+69 77 1 10 69
+68 17 1 10 69
+67 26 1 72 31
+64 46 1 73 54
+73 54 1 74 29
+74 29 1 75 77
+75 77 1 10 69
+74 29 1 77 58
+74 29 1 78 77
+78 77 1 10 69
+73 54 1 80 12
+80 12 1 81 77
+81 77 1 6 93
+80 12 1 83 26
+83 26 1 84 17
+84 17 1 85 77
+85 77 1 10 69
+84 17 1 10 69
+83 26 1 88 77
+88 77 1 10 69
+83 26 1 90 31
+83 26 1 91 77
+91 77 1 10 69
+1 48 1 93 12
+93 12 1 94 77
+94 77 1 6 93
+93 12 1 96 26
+96 26 1 97 17
+97 17 1 98 77
+98 77 1 10 69
+97 17 1 10 69
+96 26 1 101 95
+101 95 1 102 77
+102 77 1 10 69
+101 95 1 104 91
+104 91 1 105 31
+104 91 1 106 77
+106 77 1 10 69
+101 95 1 10 69
+96 26 1 109 77
+109 77 1 10 69
+1 48 1 111 54
+111 54 1 112 29
+112 29 1 113 77
+113 77 1 10 69
+112 29 1 115 81
+112 29 1 116 101
+116 101 1 117 102
+116 101 1 118 31
+111 54 1 119 12
+119 12 1 120 77
+120 77 1 6 93
+119 12 1 122 95
+122 95 1 123 77
+123 77 1 10 69
+122 95 1 125 91
+125 91 1 126 77
+126 77 1 10 69
+122 95 1 10 69
+1 48 1 129 87
+129 87 1 130 20
+130 20 1 131 26
+131 26 1 132 17
+132 17 1 133 26
+133 26 1 134 17
+134 17 1 135 77
+135 77 1 10 69
+134 17 1 10 69
+132 17 1 10 69
+130 20 1 62 3
+130 20 1 140 31
+? 6 truncate/trunc_1.py
 

File diff suppressed because it is too large
+ 147 - 3311
data/traindata/test/visualize.txt


File diff suppressed because it is too large
+ 8902 - 9842
data/traindata/train/Directory.txt


File diff suppressed because it is too large
+ 376 - 775
data/traindata/train/Hash.txt


File diff suppressed because it is too large
+ 1037 - 855
data/traindata/train/Pseudonym.txt


File diff suppressed because it is too large
+ 1915 - 10377
data/traindata/train/Share.txt


File diff suppressed because it is too large
+ 1129 - 2606
data/traindata/train/kafka.txt


+ 265 - 284
data/traindata/train/truncate.txt

@@ -2,124 +2,154 @@
 2 10 1 3 9
 2 10 1 4 9
 2 10 1 5 9
-2 10 1 6 9
-2 10 1 7 31
-2 10 1 8 31
-2 10 1 9 31
-1 48 1 10 42
-10 42 1 11 31
-1 48 1 12 12
-12 12 1 13 77
-13 77 1 14 93
-12 12 1 15 101
-15 101 1 16 102
-15 101 1 17 31
-1 48 1 18 12
-18 12 1 19 77
-19 77 1 14 93
-18 12 1 21 31
-1 48 1 22 54
-22 54 1 23 29
-23 29 1 24 77
-24 77 1 25 69
-23 29 1 26 71
-23 29 1 27 77
-27 77 1 25 69
-22 54 1 29 86
-29 86 1 30 26
-30 26 1 31 77
-31 77 1 25 69
-30 26 1 33 77
-33 77 1 25 69
-1 48 1 35 54
-35 54 1 36 77
-36 77 1 25 69
-35 54 1 38 12
-38 12 1 39 77
-39 77 1 14 93
-38 12 1 41 31
-35 54 1 42 12
-42 12 1 43 77
-43 77 1 14 93
-42 12 1 45 31
-1 48 1 46 12
-46 12 1 47 77
-47 77 1 14 93
-46 12 1 49 26
-49 26 1 50 77
-50 77 1 25 69
-49 26 1 52 77
-52 77 1 25 69
-1 48 1 54 54
-54 54 1 55 29
-55 29 1 56 77
-56 77 1 25 69
-55 29 1 58 72
-55 29 1 59 77
-59 77 1 25 69
-54 54 1 61 87
-61 87 1 62 20
-62 20 1 63 77
-63 77 1 25 69
-62 20 1 65 3
-62 20 1 66 77
-66 77 1 25 69
-54 54 1 68 96
-68 96 1 69 12
-69 12 1 70 77
-70 77 1 14 93
-69 12 1 72 26
-72 26 1 73 17
-73 17 1 74 77
-74 77 1 25 69
-73 17 1 25 69
-72 26 1 77 31
-72 26 1 78 77
-78 77 1 25 69
-72 26 1 80 77
-80 77 1 25 69
-68 96 1 82 41
-82 41 1 83 77
-83 77 1 25 69
-82 41 1 85 12
-85 12 1 86 77
-86 77 1 14 93
-85 12 1 88 26
-88 26 1 89 17
-89 17 1 90 77
-90 77 1 25 69
-89 17 1 25 69
-88 26 1 93 31
-88 26 1 94 77
-94 77 1 25 69
-88 26 1 96 77
-96 77 1 25 69
-82 41 1 98 54
-98 54 1 99 29
-99 29 1 100 77
-100 77 1 25 69
-99 29 1 102 39
-99 29 1 103 77
-103 77 1 25 69
-98 54 1 105 12
+2 10 1 6 31
+1 48 1 7 87
+7 87 1 8 55
+8 55 1 9 29
+9 29 1 10 26
+10 26 1 11 77
+11 77 1 12 69
+10 26 1 13 77
+13 77 1 12 69
+9 29 1 15 52
+9 29 1 16 77
+16 77 1 12 69
+8 55 1 18 20
+18 20 1 19 95
+19 95 1 20 77
+20 77 1 12 69
+19 95 1 22 91
+22 91 1 23 77
+23 77 1 12 69
+19 95 1 12 69
+18 20 1 26 3
+18 20 1 27 77
+27 77 1 12 69
+8 55 1 29 77
+29 77 1 12 69
+? 6 truncate/truncate_2_1.py
+
+1 48 1 2 10
+2 10 1 3 9
+2 10 1 4 9
+1 48 1 5 42
+5 42 1 6 31
+1 48 1 7 12
+7 12 1 8 77
+8 77 1 9 93
+7 12 1 10 26
+10 26 1 11 77
+11 77 1 12 69
+10 26 1 13 77
+13 77 1 12 69
+10 26 1 15 26
+15 26 1 16 17
+16 17 1 17 77
+17 77 1 12 69
+16 17 1 12 69
+15 26 1 20 31
+1 48 1 21 12
+21 12 1 22 77
+22 77 1 9 93
+21 12 1 24 95
+24 95 1 25 77
+25 77 1 12 69
+24 95 1 27 31
+24 95 1 12 69
+1 48 1 29 12
+29 12 1 30 77
+30 77 1 9 93
+29 12 1 32 95
+32 95 1 33 77
+33 77 1 12 69
+32 95 1 35 31
+32 95 1 12 69
+1 48 1 37 54
+37 54 1 38 29
+38 29 1 39 26
+39 26 1 40 77
+40 77 1 12 69
+39 26 1 42 77
+42 77 1 12 69
+38 29 1 44 72
+38 29 1 45 77
+45 77 1 12 69
+37 54 1 47 87
+47 87 1 48 77
+48 77 1 12 69
+1 48 1 50 12
+50 12 1 51 77
+51 77 1 9 93
+50 12 1 53 26
+53 26 1 54 17
+54 17 1 55 95
+55 95 1 56 77
+56 77 1 12 69
+55 95 1 58 91
+58 91 1 59 77
+59 77 1 12 69
+58 91 1 61 77
+61 77 1 12 69
+55 95 1 12 69
+54 17 1 12 69
+53 26 1 65 31
+1 48 1 66 54
+66 54 1 67 77
+67 77 1 12 69
+66 54 1 69 46
+69 46 1 70 77
+70 77 1 9 93
+69 46 1 72 26
+72 26 1 73 77
+73 77 1 12 69
+72 26 1 75 77
+75 77 1 12 69
+72 26 1 77 26
+77 26 1 78 77
+78 77 1 12 69
+77 26 1 80 77
+80 77 1 12 69
+69 46 1 82 54
+82 54 1 83 24
+83 24 1 84 6
+83 24 1 85 29
+85 29 1 86 95
+86 95 1 87 77
+87 77 1 12 69
+86 95 1 89 77
+89 77 1 12 69
+86 95 1 12 69
+85 29 1 92 39
+85 29 1 93 31
+83 24 1 94 29
+94 29 1 95 77
+95 77 1 12 69
+94 29 1 97 71
+94 29 1 98 77
+98 77 1 12 69
+82 54 1 100 12
+100 12 1 101 77
+101 77 1 9 93
+100 12 1 103 77
+103 77 1 12 69
+66 54 1 105 12
 105 12 1 106 77
-106 77 1 14 93
-105 12 1 108 77
-108 77 1 25 69
-54 54 1 110 87
-110 87 1 111 20
-111 20 1 112 95
-112 95 1 113 77
-113 77 1 25 69
-112 95 1 115 91
-115 91 1 116 31
-115 91 1 117 77
-117 77 1 25 69
-112 95 1 25 69
-111 20 1 65 3
-111 20 1 121 77
-121 77 1 25 69
-? 6 truncate/truncate_1_1.py
+106 77 1 9 93
+105 12 1 108 20
+108 20 1 109 95
+109 95 1 110 77
+110 77 1 12 69
+109 95 1 112 91
+112 91 1 113 31
+112 91 1 114 77
+114 77 1 12 69
+109 95 1 12 69
+108 20 1 117 3
+108 20 1 118 31
+1 48 1 119 87
+119 87 1 120 77
+120 77 1 12 69
+? 6 truncate/addup/smartTruncate_1.py
 
 1 48 1 2 10
 2 10 1 3 9
@@ -306,175 +336,126 @@
 
 1 48 1 2 10
 2 10 1 3 9
-1 48 1 4 12
-4 12 1 5 77
-5 77 1 6 93
-4 12 1 7 26
-7 26 1 8 17
-8 17 1 9 77
-9 77 1 10 69
-8 17 1 10 69
-1 48 1 12 46
-12 46 1 13 97
-13 97 1 14 77
-14 77 1 6 93
-13 97 1 16 77
-16 77 1 6 93
-13 97 1 6 93
-12 46 1 19 77
-19 77 1 10 69
-12 46 1 21 12
-21 12 1 22 77
-22 77 1 6 93
-21 12 1 24 26
-24 26 1 25 17
-25 17 1 26 77
-26 77 1 10 69
-25 17 1 10 69
-24 26 1 29 77
-29 77 1 10 69
-24 26 1 31 77
-31 77 1 10 69
-24 26 1 33 77
-33 77 1 10 69
-1 48 1 35 12
-35 12 1 36 77
-36 77 1 6 93
-35 12 1 38 26
-38 26 1 39 17
-39 17 1 40 77
-40 77 1 10 69
-39 17 1 10 69
-38 26 1 43 31
-1 48 1 44 54
-44 54 1 45 29
-45 29 1 46 26
-46 26 1 47 77
-47 77 1 10 69
-46 26 1 49 77
-49 77 1 10 69
-45 29 1 51 52
-45 29 1 52 31
-44 54 1 53 12
-53 12 1 54 77
-54 77 1 6 93
-53 12 1 56 20
-56 20 1 57 95
-57 95 1 58 77
-58 77 1 10 69
-57 95 1 60 31
-57 95 1 10 69
-56 20 1 62 3
-56 20 1 63 31
-1 48 1 64 46
-64 46 1 65 77
-65 77 1 6 93
-64 46 1 67 26
-67 26 1 68 17
-68 17 1 69 77
-69 77 1 10 69
-68 17 1 10 69
-67 26 1 72 31
-64 46 1 73 54
-73 54 1 74 29
-74 29 1 75 77
-75 77 1 10 69
-74 29 1 77 58
-74 29 1 78 77
-78 77 1 10 69
-73 54 1 80 12
-80 12 1 81 77
-81 77 1 6 93
-80 12 1 83 26
-83 26 1 84 17
-84 17 1 85 77
-85 77 1 10 69
-84 17 1 10 69
-83 26 1 88 77
-88 77 1 10 69
-83 26 1 90 31
-83 26 1 91 77
-91 77 1 10 69
-1 48 1 93 12
-93 12 1 94 77
-94 77 1 6 93
-93 12 1 96 26
-96 26 1 97 17
-97 17 1 98 77
-98 77 1 10 69
-97 17 1 10 69
-96 26 1 101 95
-101 95 1 102 77
-102 77 1 10 69
-101 95 1 104 91
-104 91 1 105 31
-104 91 1 106 77
-106 77 1 10 69
-101 95 1 10 69
-96 26 1 109 77
-109 77 1 10 69
-1 48 1 111 54
-111 54 1 112 29
-112 29 1 113 77
-113 77 1 10 69
-112 29 1 115 81
-112 29 1 116 101
-116 101 1 117 102
-116 101 1 118 31
-111 54 1 119 12
-119 12 1 120 77
-120 77 1 6 93
-119 12 1 122 95
-122 95 1 123 77
-123 77 1 10 69
-122 95 1 125 91
-125 91 1 126 77
-126 77 1 10 69
-122 95 1 10 69
-1 48 1 129 87
-129 87 1 130 20
-130 20 1 131 26
-131 26 1 132 17
-132 17 1 133 26
-133 26 1 134 17
-134 17 1 135 77
-135 77 1 10 69
-134 17 1 10 69
-132 17 1 10 69
-130 20 1 62 3
-130 20 1 140 31
-? 6 truncate/trunc_1.py
-
-1 48 1 2 10
-2 10 1 3 9
 2 10 1 4 9
 2 10 1 5 9
-2 10 1 6 31
-1 48 1 7 87
-7 87 1 8 55
-8 55 1 9 29
-9 29 1 10 26
-10 26 1 11 77
-11 77 1 12 69
-10 26 1 13 77
-13 77 1 12 69
-9 29 1 15 52
-9 29 1 16 77
-16 77 1 12 69
-8 55 1 18 20
-18 20 1 19 95
-19 95 1 20 77
-20 77 1 12 69
-19 95 1 22 91
-22 91 1 23 77
-23 77 1 12 69
-19 95 1 12 69
-18 20 1 26 3
-18 20 1 27 77
-27 77 1 12 69
-8 55 1 29 77
-29 77 1 12 69
-? 6 truncate/truncate_2_1.py
+2 10 1 6 9
+2 10 1 7 31
+2 10 1 8 31
+2 10 1 9 31
+1 48 1 10 42
+10 42 1 11 31
+1 48 1 12 12
+12 12 1 13 77
+13 77 1 14 93
+12 12 1 15 101
+15 101 1 16 102
+15 101 1 17 31
+1 48 1 18 12
+18 12 1 19 77
+19 77 1 14 93
+18 12 1 21 31
+1 48 1 22 54
+22 54 1 23 29
+23 29 1 24 77
+24 77 1 25 69
+23 29 1 26 71
+23 29 1 27 77
+27 77 1 25 69
+22 54 1 29 86
+29 86 1 30 26
+30 26 1 31 77
+31 77 1 25 69
+30 26 1 33 77
+33 77 1 25 69
+1 48 1 35 54
+35 54 1 36 77
+36 77 1 25 69
+35 54 1 38 12
+38 12 1 39 77
+39 77 1 14 93
+38 12 1 41 31
+35 54 1 42 12
+42 12 1 43 77
+43 77 1 14 93
+42 12 1 45 31
+1 48 1 46 12
+46 12 1 47 77
+47 77 1 14 93
+46 12 1 49 26
+49 26 1 50 77
+50 77 1 25 69
+49 26 1 52 77
+52 77 1 25 69
+1 48 1 54 54
+54 54 1 55 29
+55 29 1 56 77
+56 77 1 25 69
+55 29 1 58 72
+55 29 1 59 77
+59 77 1 25 69
+54 54 1 61 87
+61 87 1 62 20
+62 20 1 63 77
+63 77 1 25 69
+62 20 1 65 3
+62 20 1 66 77
+66 77 1 25 69
+54 54 1 68 96
+68 96 1 69 12
+69 12 1 70 77
+70 77 1 14 93
+69 12 1 72 26
+72 26 1 73 17
+73 17 1 74 77
+74 77 1 25 69
+73 17 1 25 69
+72 26 1 77 31
+72 26 1 78 77
+78 77 1 25 69
+72 26 1 80 77
+80 77 1 25 69
+68 96 1 82 41
+82 41 1 83 77
+83 77 1 25 69
+82 41 1 85 12
+85 12 1 86 77
+86 77 1 14 93
+85 12 1 88 26
+88 26 1 89 17
+89 17 1 90 77
+90 77 1 25 69
+89 17 1 25 69
+88 26 1 93 31
+88 26 1 94 77
+94 77 1 25 69
+88 26 1 96 77
+96 77 1 25 69
+82 41 1 98 54
+98 54 1 99 29
+99 29 1 100 77
+100 77 1 25 69
+99 29 1 102 39
+99 29 1 103 77
+103 77 1 25 69
+98 54 1 105 12
+105 12 1 106 77
+106 77 1 14 93
+105 12 1 108 77
+108 77 1 25 69
+54 54 1 110 87
+110 87 1 111 20
+111 20 1 112 95
+112 95 1 113 77
+113 77 1 25 69
+112 95 1 115 91
+115 91 1 116 31
+115 91 1 117 77
+117 77 1 25 69
+112 95 1 25 69
+111 20 1 65 3
+111 20 1 121 77
+121 77 1 25 69
+? 6 truncate/truncate_1_1.py
 
 1 48 1 2 10
 2 10 1 3 9

File diff suppressed because it is too large
+ 21725 - 30775
data/traindata/train/visualize.txt


+ 2 - 0
graph/astgraph/AstGraph.py

@@ -1,5 +1,6 @@
 import ast
 import os
+import random
 
 from graph.common.nodetype import *
 from graph.common.graphtype import *
@@ -64,6 +65,7 @@ def gen_graph_to_txt():
         path_out_train = CUR_PATH + "/../../data/traindata/train/" + str(kind) + ".txt"
         path_out_test = CUR_PATH + "/../../data/traindata/test/" + str(kind) + ".txt"
         file_path_list = walk_files(path_ori + "/" + kind)
+        random.shuffle(file_path_list)
         lens = int(len(file_path_list) / 4 * 3)
         with open(path_out_train, 'w') as file:
             for file_path in file_path_list[:lens]:

Some files were not shown because too many files changed in this diff