Sfoglia il codice sorgente

抽取sgns.wiki.word的配置项

郭超 4 anni fa
parent
commit
56f0260903

+ 9 - 3
src/main/java/com/mooctest/cluster/ClusterAnalyzer.java

@@ -3,7 +3,9 @@ package com.mooctest.cluster;
 import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
 import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
 import com.mooctest.data.BugDTO;
+import lombok.Data;
 import lombok.Getter;
+import org.springframework.beans.factory.annotation.Value;
 import smile.clustering.GMeans;
 import smile.clustering.HierarchicalClustering;
 import smile.clustering.MEC;
@@ -16,10 +18,14 @@ import java.util.*;
 import java.util.stream.Collectors;
 
 @Getter
+@Data
 public class ClusterAnalyzer<K> {
 
     protected Map<K, Doc<K>> documents;
     protected List<K> idList;
+    
+    @Value("${path.word}")
+    private String wordPath;
 
     public ClusterAnalyzer() {
         documents = new HashMap<>();
@@ -33,7 +39,7 @@ public class ClusterAnalyzer<K> {
     }
 
     public List<Set<K>> gmeans(List<BugDTO> bugs) throws IOException {
-        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel("/project/sgns.wiki.word"));
+        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel(wordPath));
         double[][] data = new double[bugs.size()][];
         for (int i = 0; i < bugs.size(); i++) {
             float[] vec = docVectorModel2.query(bugs.get(i).getDescription()).getElementArray();
@@ -46,7 +52,7 @@ public class ClusterAnalyzer<K> {
     }
 
     public List<Set<K>> sib(List<BugDTO> bugs) throws IOException {
-        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel("/project/sgns.wiki.word"));
+        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel(wordPath));
         double[][] data = new double[bugs.size()][];
         for (int i = 0; i < bugs.size(); i++) {
             float[] vec = docVectorModel2.query(bugs.get(i).getDescription()).getElementArray();
@@ -59,7 +65,7 @@ public class ClusterAnalyzer<K> {
     }
 
     public List<Set<K>> mec(List<BugDTO> bugs) throws IOException {
-        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel("/project/sgns.wiki.word"));
+        DocVectorModel docVectorModel2 = new DocVectorModel(new WordVectorModel(wordPath));
         double[][] data = new double[bugs.size()][];
         for (int i = 0; i < bugs.size(); i++) {
             float[] vec = docVectorModel2.query(bugs.get(i).getDescription()).getElementArray();

+ 36 - 30
src/main/java/com/mooctest/util/Doc2VecUtil.java

@@ -1,30 +1,36 @@
-package com.mooctest.util;
-
-import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
-import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
-
-import java.io.IOException;
-
-public class Doc2VecUtil {
-
-    private static volatile DocVectorModel docVectorModel;
-
-
-    public static DocVectorModel loadModel() {
-        if (docVectorModel == null) {
-            synchronized (Doc2VecUtil.class) {
-                if (docVectorModel == null) {
-                    try {
-//                        docVectorModel = new DocVectorModel(new WordVectorModel("D:\\work\\project\\yanbaoronghe\\data\\sgns.wiki.word"));
-//                        docVectorModel = new DocVectorModel(new  WordVectorModel("/Users/guochao/Desktop/project/data/sgns.wiki.word"));
-                        docVectorModel = new DocVectorModel(new  WordVectorModel("/project/sgns.wiki.word"));
-                    } catch (IOException e) {
-                        e.printStackTrace();
-                    }
-                }
-            }
-        }
-        return docVectorModel;
-    }
-
-}
+package com.mooctest.util;
+
+import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
+import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
+
+import java.io.IOException;
+
+public class Doc2VecUtil {
+
+//    private static String wordPath;
+//
+//    @Value("${path.word}")
+//    public void setWordPath(String wordPath) {
+//        Doc2VecUtil.wordPath = wordPath;
+//    }
+
+    private static volatile DocVectorModel docVectorModel;
+
+    public static DocVectorModel loadModel() {
+        if (docVectorModel == null) {
+            synchronized (Doc2VecUtil.class) {
+                if (docVectorModel == null) {
+                    try {
+//                        docVectorModel = new DocVectorModel(new WordVectorModel("D:\\work\\project\\yanbaoronghe\\data\\sgns.wiki.word"));
+//                        docVectorModel = new DocVectorModel(new  WordVectorModel("/Users/guochao/Desktop/project/data/sgns.wiki.word"));
+                        docVectorModel = new DocVectorModel(new  WordVectorModel("/project/sgns.wiki.word"));
+                    } catch (IOException e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        }
+        return docVectorModel;
+    }
+
+}

+ 10 - 1
src/main/resources/application.yml

@@ -51,6 +51,9 @@ baseurl:
 
 useOss: true
 #filePath: /Users/guochao/Desktop/xinchuang/json/
+
+path:
+  word: /Users/guochao/Desktop/project/data/sgns.wiki.word
 ---
 spring:
   profiles: private-localhost
@@ -78,6 +81,9 @@ save:
   json: /json/
   image: /image/
 
+path:
+  word: /Users/guochao/Desktop/project/data/sgns.wiki.word
+
 ---
 spring:
   profiles: private-cloud
@@ -103,4 +109,7 @@ useOss: false
 save:
   path: /var/www/xinchuang
   json: /json/
-  image: /image/
+  image: /image/
+
+path:
+  word: /project/sgns.wiki.word

+ 2 - 0
src/test/java/com/mooctest/demo/DemoTrain.java

@@ -2,9 +2,11 @@ package com.mooctest.demo;
 
 import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
 import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
+import org.springframework.stereotype.Component;
 
 import java.io.IOException;
 
+@Component
 public class DemoTrain {
     public static void main(String[] args) throws IOException {
 //        Word2VecTrainer trainerBuilder = new Word2VecTrainer();