Bläddra i källkod

更新训练测试代码

wendaojidian 2 år sedan
förälder
incheckning
57089b36fa

+ 1 - 1
.idea/GnnForPrivacyScan.iml

@@ -4,7 +4,7 @@
     <content url="file://$MODULE_DIR$">
       <sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
     </content>
-    <orderEntry type="inheritedJdk" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (py36) (2)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

+ 1 - 1
.idea/misc.xml

@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (base)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (py36) (2)" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>

+ 1 - 1
data/traindata/test/visualize.txt

@@ -5593,7 +5593,7 @@
 1 48 1 28 87
 28 87 1 29 77
 29 77 1 10 69
-? 7 visualize/Visualization-of-popular-algorithms-in-Python/Topological Sort/topological_sort_4.py
+? 7 visualize/Visualization-of-popular-algorithms-in-Python/TopologicalSort/topological_sort_4.py
 
 1 48 1 2 10
 2 10 1 3 9

+ 2 - 1
dataloader/dataset.py

@@ -13,6 +13,7 @@ def load_from_directory(path):
     for file_name in os.listdir(path):
         with open(path + "/" + file_name, 'r') as file:
             for line in file:
+                print(line)
                 if len(line.strip()) == 0:
                     node_id_data_list.append([node_id, graph_type])
                     node_type_data_list.append([node_type, graph_type])
@@ -126,6 +127,6 @@ if __name__ == '__main__':
     #     "/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/traindata/train/Directory.txt")
     # a = 5
     bi = Dataset(
-        "/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/traindata/train", True)
+        "I:\Program\Python\sap\GnnForPrivacyScan\data\\traindata\\train", True)
     for data in bi:
         a = 5

+ 0 - 0
logs/__init__.py


BIN
logs/run-001/events.out.tfevents.1665307264.DESKTOP-CA52H9H


BIN
logs/run-002/events.out.tfevents.1665308717.DESKTOP-CA52H9H


BIN
model/model_bk/model.ckpt


BIN
model/model_bk/model.ckpt.0


BIN
model/model_bk/model.ckpt.1


BIN
model/model_bk/model.ckpt.2


BIN
model/model_bk/model.ckpt.3


BIN
model/model_bk/model.ckpt.4


BIN
model/model_bk/model.ckpt.5


BIN
model/model_bk/model.ckpt.6


BIN
model/model_bk/model.ckpt.7


BIN
model/model_bk/model.ckpt.8


BIN
model/model_bk/model.ckpt.9


+ 1 - 1
train/model.py

@@ -112,7 +112,7 @@ class GGNN(nn.Module):
             nn.Linear(opt.state_dim, opt.n_hidden),
             nn.LeakyReLU(),
             nn.Linear(opt.n_hidden, opt.n_classes),
-            nn.Softmax(dim=1)
+            # nn.Softmax(dim=1)
         )
 
         # self.class_prediction = nn.Sequential(

+ 53 - 0
train/test.py

@@ -0,0 +1,53 @@
+import torch
+from torch.autograd import Variable
+from shutil import copyfile
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+
+
+def test(dataloader, net, criterion, optimizer, opt):
+    test_loss = 0
+    correct = 0
+    net.eval()
+
+    all_targets = []
+    all_predicted = []
+
+    for i, (adj_matrix, embedding_matrix, target) in enumerate(dataloader, 0):
+        # padding = torch.zeros(len(annotation), opt.n_node, opt.state_dim - opt.annotation_dim).double()
+        # init_input = torch.cat((annotation, padding), 2)
+        # init_input = torch.zeros(len(adj_matrix), opt.n_node, opt.state_dim).double()
+        init_input = embedding_matrix
+        if opt.cuda:
+            init_input = init_input.cuda()
+            adj_matrix = adj_matrix.cuda()
+            # annotation = annotation.cuda()
+            target = target.cuda()
+
+        init_input = Variable(init_input)
+        adj_matrix = Variable(adj_matrix)
+        # annotation = Variable(annotation)
+        target = Variable(target)
+        # print(target)
+        output = net(init_input, adj_matrix)
+        # print(output)
+        # test_loss += criterion(output, target).data[0]
+        test_loss += criterion(output, target).item()
+
+        pred = output.data.max(1, keepdim=True)[1]
+        # print(pred)
+
+        all_predicted.extend(pred.data.view_as(target).cpu().numpy())
+        all_targets.extend(target.cpu().numpy())
+
+        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
+
+    test_loss /= len(dataloader.dataset)
+
+    print('Accuracy:', accuracy_score(all_targets, all_predicted))
+    print(classification_report(all_targets, all_predicted))
+    print(confusion_matrix(all_targets, all_predicted))
+
+    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
+        test_loss, correct, len(dataloader.dataset),
+        100. * correct / len(dataloader.dataset)))

+ 8 - 8
train/train.py

@@ -7,18 +7,19 @@ from dataloader.dataset import Dataset
 
 
 def train(epoch, dataloader, net, criterion, optimizer, opt, writer):
+    print("------------training_epoch: ", epoch, "----------------------------")
     for i, (adj_matrix, embedding_matrix, target) in enumerate(dataloader, 0):
-        print("----------------")
+        # print("---------", i, "-----------")
         net.zero_grad()
-        print(embedding_matrix)
+        # print(embedding_matrix)
         # padding = torch.zeros(len(annotation), opt.n_node, opt.state_dim - opt.annotation_dim).double()
         # init_input = torch.cat((annotation, padding), 2)
         # init_input = torch.zeros(len(adj_matrix), opt.n_node, opt.state_dim).double()
 
         # init_input = torch.from_numpy(embedding_matrix).double()
         init_input = embedding_matrix
-        print(init_input.shape)
-        print(init_input)
+        # print("input_shape", init_input.shape)
+        # print(init_input)
         if opt.cuda:
             init_input = init_input.cuda()
             adj_matrix = adj_matrix.cuda()
@@ -30,16 +31,15 @@ def train(epoch, dataloader, net, criterion, optimizer, opt, writer):
         # annotation = Variable(annotation)
         target = Variable(target)
         output = net(init_input, adj_matrix)
-        print(output.shape)
-        print(target.shape)
+        # print("ouput_shape", output.shape)
+        # print("target_shape", target.shape)
         # print(output)
         # print(target)
         loss = criterion(output, target)
         loss.backward()
         optimizer.step()
 
-        print(loss)
-        print(epoch)
+        print("loss", loss)
         writer.add_scalar('loss', loss.data.item(), int(epoch))
         if i % int(len(dataloader) / 10 + 1) == 0 and opt.verbal:
             print('[%d/%d][%d/%d] Loss: %.4f' % (epoch, opt.niter, i, len(dataloader), loss.item()))

+ 30 - 9
traingnn.py

@@ -10,6 +10,7 @@ from tensorboardX import SummaryWriter
 from dataloader.dataloader import PrivacyDataloader
 from dataloader.dataset import Dataset
 from train.model import GGNN
+from train.test import test
 from train.train import train
 
 parser = argparse.ArgumentParser()
@@ -18,9 +19,9 @@ parser.add_argument('--train_batch_size', type=int, default=5, help='input batch
 parser.add_argument('--test_batch_size', type=int, default=5, help='input batch size')
 parser.add_argument('--state_dim', type=int, default=106, help='GGNN hidden state size')
 parser.add_argument('--n_steps', type=int, default=10, help='propogation steps number of GGNN')
-parser.add_argument('--niter', type=int, default=150, help='number of epochs to train for')
+parser.add_argument('--niter', type=int, default=10, help='number of epochs to train for')
 parser.add_argument('--lr', type=float, default=0.01, help='learning rate')
-parser.add_argument('--cuda', action='store_true', help='enables cuda')
+parser.add_argument('--cuda', type=bool, default=True, help='enables cuda')
 parser.add_argument('--verbal', type=bool, default=True, help='print training info or not')
 parser.add_argument('--manualSeed', type=int, help='manual seed')
 parser.add_argument('--n_classes', type=int, default=7, help='manual seed')
@@ -29,13 +30,13 @@ parser.add_argument('--model_path', default="model/model.ckpt", help='path to sa
 parser.add_argument('--n_hidden', type=int, default=50, help='number of hidden layers')
 parser.add_argument('--size_vocabulary', type=int, default=108, help='maximum number of node types')
 parser.add_argument('--is_training_ggnn', type=bool, default=True, help='Training GGNN or BiGGNN')
-parser.add_argument('--training', action="store_true", help='is training')
-parser.add_argument('--testing', action="store_true", help='is testing')
+parser.add_argument('--training', type=bool, default=True, help='is training')
+parser.add_argument('--testing', type=bool, default=False, help='is testing')
 parser.add_argument('--training_percentage', type=float, default=1.0, help='percentage of data use for training')
 parser.add_argument('--log_path', default="logs/", help='log path for tensorboard')
 parser.add_argument('--epoch', type=int, default=5, help='epoch to test')
-parser.add_argument('--n_edge_types', type=int, default=65, help='edge types')
-parser.add_argument('--n_node', type=int, default=100, help='node types')
+parser.add_argument('--n_edge_types', type=int, default=1, help='edge types')
+parser.add_argument('--n_node', type=int, help='node types')
 
 opt = parser.parse_args()
 
@@ -55,16 +56,28 @@ opt Namespace(workers=2, train_batch_size=5, test_batch_size=5, state_dim=30, n_
 def main(opt):
 
     train_dataset = Dataset(
-        "/Users/liufan/program/PYTHON/sap2nd/GnnForPrivacyScan/data/traindata/train", True)
+        "data/traindata/train", True)
     train_dataloader = PrivacyDataloader(train_dataset, batch_size=5, shuffle=True, num_workers=2)
 
+    test_dataset = Dataset(
+        "data/traindata/test", True)
+    test_dataloader = PrivacyDataloader(test_dataset, batch_size=5, shuffle=True, num_workers=2)
+
+
     opt.annotation_dim = 1  # for bAbI
     if opt.training:
         opt.n_edge_types = train_dataset.n_edge_types
         opt.n_node = train_dataset.n_node_by_id
+    else:
+        opt.n_edge_types = test_dataset.n_edge_types
+        opt.n_node = test_dataset.n_node_by_id
 
-    filename = opt.model_path
-    epoch = -1
+    if opt.testing:
+        filename = "{}.{}".format(opt.model_path, opt.epoch)
+        epoch = opt.epoch
+    else:
+        filename = opt.model_path
+        epoch = -1
 
     if os.path.exists(filename):
         if opt.testing:
@@ -121,6 +134,14 @@ def main(opt):
             train(epoch, train_dataloader, net, criterion, optimizer, opt, writer)
         writer.close()
 
+    if opt.testing:
+        filename = "{}.{}".format(opt.model_path, epoch)
+        if os.path.exists(filename):
+            net = torch.load(filename)
+            net.cuda()
+            optimizer = optim.Adam(net.parameters(), lr=opt.lr)
+        test(test_dataloader, net, criterion, optimizer, opt)
+
 
 if __name__ == '__main__':
     main(opt)