瀏覽代碼

训练更新

wendaojidian 2 年之前
父節點
當前提交
628ea40602

+ 4 - 1
.idea/GnnForPrivacyScan.iml

@@ -4,7 +4,10 @@
     <content url="file://$MODULE_DIR$">
       <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.7 (py36) (2)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (py36)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
+  <component name="PackageRequirementsSettings">
+    <option name="requirementsPath" value="" />
+  </component>
 </module>

+ 1 - 1
.idea/misc.xml

@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (py36) (2)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (py36)" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>

+ 0 - 24
.idea/vcs.xml

@@ -2,29 +2,5 @@
 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="$PROJECT_DIR$" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/AzureStorage" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/azure-multiapi-storage-python" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Azure/python-text-classification" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/BI/BusinessIntelligence-Kaggle" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Hash/Encryption_And_Hashing" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/ML-In-Action" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine-Learining-Security" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine-Learning" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/ML/Machine_Learning_and_Having_It_Deep_and_Structured" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/NATS/NatsExample" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/NATS/asyncio-nats-examples" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/Pseudonym/Data-Masking" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/NatsExample" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/odoo-s3-storage" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/S3/s3-concat" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/archive/auto-archiver" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/Calories-Alert-Kafka" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/MessageCorps" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/ai-project-fraud-detection" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/kafka-fraud-detector" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/kafkaesk" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/scrapy-kafka" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/kafka/tail2kafka" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/data/purposeCombined/visualize/Visualization-of-popular-algorithms-in-Python" vcs="Git" />
   </component>
 </project>

+ 18 - 6
dataloader/dataset.py

@@ -13,7 +13,6 @@ def load_from_directory(path):
     for file_name in os.listdir(path):
         with open(path + "/" + file_name, 'r') as file:
             for line in file:
-                print(line)
                 if len(line.strip()) == 0:
                     node_id_data_list.append([node_id, graph_type])
                     node_type_data_list.append([node_type, graph_type])
@@ -102,15 +101,28 @@ class Dataset:
     Load bAbI tasks for GGNN
     """
     def __init__(self, path, is_train):
-        data_id, data_type = load_from_directory(path)
+        data_id = list()
+        data_type = list()
+        train_data_id, train_data_type = load_from_directory(path + "/train")
+        test_data_id, test_data_type = load_from_directory(path + "/test")
+
+        data_id.extend(train_data_id)
+        data_id.extend(test_data_id)
+        data_type.extend(train_data_type)
+        data_type.extend(test_data_type)
+
         self.n_edge_types = find_max_edge_id(data_id)
         max_node_id = find_max_node_id(data_id)
         max_node_type = find_max_node_id(data_type)
 
         self.n_node_by_id = max_node_id
         self.n_node_by_type = max_node_type
-        self.node_by_id = convert_program_data(data_id, 1, self.n_node_by_id)
-        self.node_by_type = convert_program_data(data_type, 1, self.n_node_by_type)
+        if is_train:
+            self.node_by_id = convert_program_data(train_data_id, 1, self.n_node_by_id)
+            self.node_by_type = convert_program_data(train_data_type, 1, self.n_node_by_type)
+        else:
+            self.node_by_id = convert_program_data(test_data_id, 1, self.n_node_by_id)
+            self.node_by_type = convert_program_data(test_data_type, 1, self.n_node_by_type)
 
     def __getitem__(self, index):
         am = create_adjacency_matrix(self.node_by_id[index][0], self.n_node_by_id, self.n_edge_types)
@@ -127,6 +139,6 @@ if __name__ == '__main__':
     #     "/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/traindata/train/Directory.txt")
     # a = 5
     bi = Dataset(
-        "I:\Program\Python\sap\GnnForPrivacyScan\data\\traindata\\train", True)
-    for data in bi:
+        "I:\Program\Python\sap\GnnForPrivacyScan\data\\traindata", True)
+    for d in bi:
         a = 5

+ 0 - 0
logs/run-001/events.out.tfevents.1665307264.DESKTOP-CA52H9H → logsbk/run-001/events.out.tfevents.1665307264.DESKTOP-CA52H9H


+ 0 - 0
logs/run-002/events.out.tfevents.1665308717.DESKTOP-CA52H9H → logsbk/run-002/events.out.tfevents.1665308717.DESKTOP-CA52H9H


+ 0 - 1
train/model.py

@@ -159,6 +159,5 @@ class GGNN(nn.Module):
         # print(output.shape)
         if self.is_training_ggnn == True:
             output = self.class_prediction(output)
-        # print(output.shape)
 
         return output

+ 9 - 4
train/test.py

@@ -2,7 +2,7 @@ import torch
 from torch.autograd import Variable
 from shutil import copyfile
 from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
-
+from torchsummary import summary
 
 
 def test(dataloader, net, criterion, optimizer, opt):
@@ -12,11 +12,14 @@ def test(dataloader, net, criterion, optimizer, opt):
 
     all_targets = []
     all_predicted = []
+    print("len", len(dataloader))
 
     for i, (adj_matrix, embedding_matrix, target) in enumerate(dataloader, 0):
         # padding = torch.zeros(len(annotation), opt.n_node, opt.state_dim - opt.annotation_dim).double()
         # init_input = torch.cat((annotation, padding), 2)
         # init_input = torch.zeros(len(adj_matrix), opt.n_node, opt.state_dim).double()
+        print(adj_matrix.shape)
+        print("target.shape", target.shape)
         init_input = embedding_matrix
         if opt.cuda:
             init_input = init_input.cuda()
@@ -28,14 +31,16 @@ def test(dataloader, net, criterion, optimizer, opt):
         adj_matrix = Variable(adj_matrix)
         # annotation = Variable(annotation)
         target = Variable(target)
-        # print(target)
+        print("init_input_shape", init_input.shape)
+        print("target", target)
+        # summary(net, init_input.shape, batch_size=5)
         output = net(init_input, adj_matrix)
-        # print(output)
+        print("output", output)
         # test_loss += criterion(output, target).data[0]
         test_loss += criterion(output, target).item()
 
         pred = output.data.max(1, keepdim=True)[1]
-        # print(pred)
+        print("pred", pred)
 
         all_predicted.extend(pred.data.view_as(target).cpu().numpy())
         all_targets.extend(target.cpu().numpy())

+ 6 - 3
train/train.py

@@ -2,6 +2,8 @@ import torch
 from torch.autograd import Variable
 from shutil import copyfile
 
+from torchsummary import summary
+
 from dataloader.dataloader import PrivacyDataloader
 from dataloader.dataset import Dataset
 
@@ -18,7 +20,7 @@ def train(epoch, dataloader, net, criterion, optimizer, opt, writer):
 
         # init_input = torch.from_numpy(embedding_matrix).double()
         init_input = embedding_matrix
-        # print("input_shape", init_input.shape)
+        print("input_shape", init_input.shape)
         # print(init_input)
         if opt.cuda:
             init_input = init_input.cuda()
@@ -30,11 +32,12 @@ def train(epoch, dataloader, net, criterion, optimizer, opt, writer):
         adj_matrix = Variable(adj_matrix)
         # annotation = Variable(annotation)
         target = Variable(target)
+        # print(adj_matrix.shape)
         output = net(init_input, adj_matrix)
         # print("ouput_shape", output.shape)
         # print("target_shape", target.shape)
-        # print(output)
-        # print(target)
+        print("output", output)
+        print(target)
         loss = criterion(output, target)
         loss.backward()
         optimizer.step()

+ 27 - 12
traingnn.py

@@ -6,6 +6,7 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 from tensorboardX import SummaryWriter
+from torchsummary import summary
 
 from dataloader.dataloader import PrivacyDataloader
 from dataloader.dataset import Dataset
@@ -15,17 +16,17 @@ from train.train import train
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
-parser.add_argument('--train_batch_size', type=int, default=5, help='input batch size')
-parser.add_argument('--test_batch_size', type=int, default=5, help='input batch size')
+parser.add_argument('--train_batch_size', type=int, default=1, help='input batch size')
+parser.add_argument('--test_batch_size', type=int, default=1, help='input batch size')
 parser.add_argument('--state_dim', type=int, default=106, help='GGNN hidden state size')
 parser.add_argument('--n_steps', type=int, default=10, help='propogation steps number of GGNN')
 parser.add_argument('--niter', type=int, default=10, help='number of epochs to train for')
-parser.add_argument('--lr', type=float, default=0.01, help='learning rate')
+parser.add_argument('--lr', type=float, default=0.0005, help='learning rate')
 parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
 parser.add_argument('--verbal', type=bool, default=True, help='print training info or not')
 parser.add_argument('--manualSeed', type=int, help='manual seed')
 parser.add_argument('--n_classes', type=int, default=7, help='manual seed')
-parser.add_argument('--directory', default="data/traindata/train", help='program data')
+parser.add_argument('--directory', default="data/traindata", help='program data')
 parser.add_argument('--model_path', default="model/model.ckpt", help='path to save the model')
 parser.add_argument('--n_hidden', type=int, default=50, help='number of hidden layers')
 parser.add_argument('--size_vocabulary', type=int, default=108, help='maximum number of node types')
@@ -56,14 +57,13 @@ opt Namespace(workers=2, train_batch_size=5, test_batch_size=5, state_dim=30, n_
 def main(opt):
 
     train_dataset = Dataset(
-        "data/traindata/train", True)
+        "data/traindata", True)
     train_dataloader = PrivacyDataloader(train_dataset, batch_size=5, shuffle=True, num_workers=2)
 
     test_dataset = Dataset(
-        "data/traindata/test", True)
+        "data/traindata", False)
     test_dataloader = PrivacyDataloader(test_dataset, batch_size=5, shuffle=True, num_workers=2)
 
-
     opt.annotation_dim = 1  # for bAbI
     if opt.training:
         opt.n_edge_types = train_dataset.n_edge_types
@@ -105,7 +105,7 @@ def main(opt):
 
     net = GGNN(opt)
     net.double()
-    print(net)
+    # print(net)
 
     criterion = nn.CrossEntropyLoss()
 
@@ -125,10 +125,8 @@ def main(opt):
         # writer = SummaryWriter(opt.log_path)
     else:
         writer = None
-    opt.training = True
-    print(opt)
+    print(net)
 
-    # embedding_matrix = train_dataset.embedding_matrix
     if opt.training:
         for epoch in range(epoch + 1, epoch + opt.niter):
             train(epoch, train_dataloader, net, criterion, optimizer, opt, writer)
@@ -140,8 +138,25 @@ def main(opt):
             net = torch.load(filename)
             net.cuda()
             optimizer = optim.Adam(net.parameters(), lr=opt.lr)
+        print(opt)
         test(test_dataloader, net, criterion, optimizer, opt)
 
 
-if __name__ == '__main__':
+def test_gnn():
+    opt.directory = "data/traindata"
+    opt.training = False
+    opt.testing = True
+    opt.model_path = 'model/model_bk/model.ckpt'
+    main(opt)
+
+
+def train_gnn():
     main(opt)
+
+
+if __name__ == '__main__':
+    train_gnn()
+
+"""
+[-0.5253, -0.7534,  0.6765, -1.0767,  0.7319, -2.6533,  0.5728]
+"""