Sfoglia il codice sorgente

1.去除主站依赖
2.图片下载合并到主流程,计算异步化,前端轮询任务状态
3.计算时不再删除全表,而是删除指定任务的结果

梅杰 6 anni fa
parent
commit
c48cfcf5b9
24 ha cambiato i file con 590 aggiunte e 220 eliminazioni
  1. 5 7
      pom.xml
  2. 15 136
      src/main/java/com/mooctest/controller/AggController.java
  3. 13 3
      src/main/java/com/mooctest/controller/GraphController.java
  4. 12 0
      src/main/java/com/mooctest/dao/AggTaskStatusDao.java
  5. 2 0
      src/main/java/com/mooctest/dao/MasterReportDao.java
  6. 2 0
      src/main/java/com/mooctest/dao/SupplementDao.java
  7. 7 0
      src/main/java/com/mooctest/event/Event.java
  8. 42 0
      src/main/java/com/mooctest/event/EventUtil.java
  9. 18 0
      src/main/java/com/mooctest/event/TaskEndEvent.java
  10. 22 0
      src/main/java/com/mooctest/event/TaskEndListener.java
  11. 18 0
      src/main/java/com/mooctest/event/TaskStartEvent.java
  12. 36 0
      src/main/java/com/mooctest/event/TaskStartListener.java
  13. 23 8
      src/main/java/com/mooctest/image/ImageDownload.java
  14. 34 0
      src/main/java/com/mooctest/model/AggTaskStatus.java
  15. 57 0
      src/main/java/com/mooctest/service/AggTaskStatusService.java
  16. 182 0
      src/main/java/com/mooctest/service/AggregationService.java
  17. 2 1
      src/main/java/com/mooctest/service/BugReportService.java
  18. 14 5
      src/main/java/com/mooctest/service/DiffImgService.java
  19. 5 5
      src/main/java/com/mooctest/service/GraphService.java
  20. 2 2
      src/main/java/com/mooctest/service/MasterReportService.java
  21. 18 7
      src/main/java/com/mooctest/service/SupplementService.java
  22. 5 0
      src/main/java/com/mooctest/service/impl/BugReportServiceImpl.java
  23. 2 2
      src/main/java/com/mooctest/util/ImageUtil.java
  24. 54 44
      src/main/resources/static/js/app_info.js

+ 5 - 7
pom.xml

@@ -117,13 +117,6 @@
             <version>5.1.21</version>
         </dependency>
 
-        <!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
-        <dependency>
-            <groupId>net.sourceforge.tess4j</groupId>
-            <artifactId>tess4j</artifactId>
-            <version>3.4.8</version>
-        </dependency>
-
 
         <!-- druid数据库连接池 -->
         <dependency>
@@ -142,6 +135,11 @@
             <artifactId>spring-boot-configuration-processor</artifactId>
             <optional>true</optional>
         </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>19.0</version>
+        </dependency>
 
     </dependencies>
 

+ 15 - 136
src/main/java/com/mooctest/controller/AggController.java

@@ -2,11 +2,14 @@ package com.mooctest.controller;
 
 import com.mooctest.cluster.ClusterAnalyzer;
 import com.mooctest.cluster.Group;
+import com.mooctest.event.EventUtil;
+import com.mooctest.event.TaskStartEvent;
 import com.mooctest.image.ImageDownload;
 import com.mooctest.data.BugDTO;
 import com.mooctest.data.DiffImg;
 import com.mooctest.data.DiffText;
 import com.mooctest.data.ReportDTO;
+import com.mooctest.model.AggTaskStatus;
 import com.mooctest.model.MasterReport;
 import com.mooctest.nlp.DistanceMatrix;
 import com.mooctest.service.*;
@@ -34,15 +37,12 @@ public class AggController {
     @Autowired
     MasterReportService masterReportService;
 
-    @Autowired
-    DiffTextService diffTextService;
 
     @Autowired
-    DiffImgService diffImgService;
+    AggTaskStatusService aggTaskStatusService;
 
     @Autowired
-    SupplementService supplementService;
-
+    EventUtil eventUtil;
     @GetMapping("/aggregate_info")
     public MasterReport getAggInfo(@RequestParam("bugId") String bugId) {
         return masterReportService.getByBugId(bugId);
@@ -51,146 +51,25 @@ public class AggController {
     @GetMapping("/aggregate")
     public String aggregate(@RequestParam("examId") long examId,
                             @RequestParam("caseId") long caseId) throws IOException {
-        List<ReportDTO> reports = bugReportService.getReports(examId, caseId);
-//        List<ReportDTO> reports = bugReportService.getReports(2614, 1490);
-        List<BugDTO> bugs = bugReportService.mergeAllBugs(reports);
-
-        ClusterAnalyzer<String> analyzer = new ClusterAnalyzer<>();
-        List<String> bugIds = bugs.stream().map(BugDTO::getId).collect(Collectors.toList());
-        double[][] distMatrix = DistanceMatrix.genHybridDist(bugs);
-        List<Set<String>> clusters = analyzer.HAC(distMatrix, bugIds, 0.23);
-
-        Map<String, BugDTO> bugMap = bugs.stream().collect(toMap(BugDTO::getId, Function.identity()));
-        bugs = null;
-        Map<String, Set<String>> masterClusterMap = new HashMap<>();
-        masterReportService.deleteAll();
-        for (Set<String> cluster : clusters) {
-            String masterReport = masterReportService.findMasterReport(cluster, bugMap);
-            masterReportService.saveMasterReport(masterReport, examId, caseId, cluster);
-            masterClusterMap.put(masterReport, cluster);
-        }
-        clusters = null;
-
-        showClusterResult(masterClusterMap, bugMap);
-
-        //gen diff text
-        Map<String, List<DiffText>> masterDiffTextMap = diffTextService.genMasterDiffTextMap(masterClusterMap, bugMap);
-
-        showTextDiffResult(masterDiffTextMap, bugMap);
-
-        //gen diff img
-        Map<String, List<DiffImg>> masterDiffImgMap = diffImgService.genMasterDiffImgMap(masterClusterMap, bugMap);
-
-        //cluster diff text
-        Map<String, List<Group<String, DiffText>>> masterDiffTextClustersMap = diffTextService.genDiffTextClusters(masterDiffTextMap);
-
-        //cluster diff img
-        Map<String, List<Group<String, DiffImg>>> masterDiffImgClustersMap = diffImgService.genDiffImgClusters(masterDiffImgMap);
-
-        //combine cluster
-        combineCluster(masterDiffTextClustersMap, masterDiffImgClustersMap);
-
-        //top
-        supplementService.rankAndStoreDiffText(masterDiffTextClustersMap);
-        supplementService.rankAndStoreDiffImg(masterDiffImgClustersMap);
+        TaskStartEvent e = new TaskStartEvent(examId, caseId);
+        eventUtil.post(e);
 
         return "ok";
     }
 
-    public  void combineCluster(Map<String, List<Group<String, DiffText>>> masterDiffTextClustersMap,
-                                              Map<String, List<Group<String, DiffImg>>> masterDiffImgClustersMap) {
-        double LINK_THRESHOLD = 0.2;
-        masterDiffTextClustersMap.forEach((masterId, diffTextClusters) -> {
-            List<Group<String, DiffImg>> diffImgClusters = masterDiffImgClustersMap.get(masterId);
-            if (diffImgClusters != null) {
-
-                for (int i = 0; i < diffTextClusters.size(); i++) {
-                    Set<String> textBugIds = diffTextClusters.get(i).getCluster().stream().map(DiffText::getBugId).collect(Collectors.toSet());
-                    for (int j = 0; j < diffImgClusters.size(); j++) {
-                        Set<String> imgBugIds = diffImgClusters.get(j).getCluster().stream().map(DiffImg::getBugId).collect(Collectors.toSet());
-                        int inter = CollectionUtils.intersection(textBugIds, imgBugIds).size();
-                        int union = CollectionUtils.union(textBugIds, imgBugIds).size();
-                        double dist = 1.0 - (1.0 * inter) / (1.0 * union);
-                        if (dist > LINK_THRESHOLD) {
-                            diffImgClusters.get(j).setId(diffTextClusters.get(i).getId());
-                        }
-                    }
-                }
-            }
-        });
-    }
-    public void showClusterResult(Map<String, Set<String>> masterClusterMap, Map<String, BugDTO> bugMap) {
-        System.out.println("showMasterReportResult:");
-        masterClusterMap.entrySet().forEach(entry -> {
-            System.out.println("[");
-            System.out.println("master:");
-            System.out.println(entry.getKey() + " " + bugMap.get(entry.getKey()).getDescription());
-            System.out.println();
-
-            entry.getValue().forEach(bugId -> {
-                System.out.println(bugId + " " + bugMap.get(bugId).getDescription());
-            });
-            System.out.println("]");
-        });
-
+    @GetMapping("/agg_task_status")
+    public int getAggStatus(@RequestParam("examId") long examId,
+                            @RequestParam("caseId") long caseId) {
+        AggTaskStatus status = aggTaskStatusService.getByTaskId(examId, caseId);
+        if (status == null) {
+            return 0;
+        }
+        return status.getStatus();
     }
 
 
-    public void showTextDiffResult(Map<String, List<DiffText>> masterDiffTextMap, Map<String, BugDTO> bugMap) {
-        System.out.println("showDiffResult:");
-        masterDiffTextMap.entrySet().forEach(entry -> {
-            System.out.println("[");
-            System.out.println("master:");
-            System.out.println(entry.getKey() + " " + bugMap.get(entry.getKey()).getDescription());
-            System.out.println();
 
-            entry.getValue().forEach(diffText -> {
-                System.out.println(diffText.getBugId() + "_" + diffText.getIndex() + diffText.getSentence());
-            });
-            System.out.println("]");
-        });
 
-    }
-    public void showClusterResult(List<Set<String>> results, Map<String, BugDTO> bugMap) {
-        System.out.println("size: " + results.size());
-        results.forEach(cluster -> {
-            System.out.println("[");
-            cluster.forEach(bugId -> {
-                System.out.println(bugId + " " + bugMap.get(bugId).getDescription());
-            });
-            System.out.println("]");
-        });
-    }
-
-    @GetMapping("/down_img")
-    public String downImg() {
-
-        List<ReportDTO> reports = bugReportService.getReports(2613, 1489);
-        List<BugDTO> bugs = bugReportService.mergeAllBugs(reports);
-
-        bugs.forEach(bug -> {
-            if (bug.getImgUrls() != null && bug.getImgUrls().length > 0) {
-                String[] imgUrls = bug.getImgUrls();
-                for (int i = 0; i < imgUrls.length; i++) {
-                    if (imgUrls[i] == null || !imgUrls[i].startsWith("http")) {
-                        continue;
-                    }
-
-                    String fileName = bug.getId() + "_" + i;
-                    System.out.println("正在下载 " + fileName);
-                    try {
-
-                        ImageDownload.createImage(imgUrls[i], fileName);
-                    } catch (Exception e) {
-                        System.out.println(imgUrls[i]);
-                        e.printStackTrace();
-                    }
-                }
-
-            }
-        });
-        return "ok";
-    }
 
 
 }

+ 13 - 3
src/main/java/com/mooctest/controller/GraphController.java

@@ -52,12 +52,16 @@ public class GraphController {
 
     @GetMapping(value = "/graphDetail/rawReport")
     public void renderRawReport(@RequestParam("bugId") String bugId,
+                                  @RequestParam("masterId") String masterId,
                                   @RequestParam(value = "isMaster", required = false, defaultValue = "false") boolean isMaster,
                                   Model model,
                                   HttpServletRequest req,
                                   HttpServletResponse resp) throws Exception {
 
-        BugDTO bug = bugReportService.getBugById(bugId);
+        long[] ids = masterReportService.getExamIdAndCaseIdByMasterId(masterId);
+        long examId = ids[0];
+        long caseId = ids[1];
+        BugDTO bug = bugReportService.getBugById(bugId, examId, caseId);
         model.addAttribute("bugReport", bug);
         model.addAttribute("isMaster", isMaster);
 
@@ -75,7 +79,9 @@ public class GraphController {
                                 Model model) throws Exception {
 
         long[] ids = masterReportService.getExamIdAndCaseIdByMasterId(masterId);
-        Map<String, BugDTO> bugMap = bugReportService.getAllBugsMap(ids[0], ids[1]);
+        long examId = ids[0];
+        long caseId = ids[1];
+        Map<String, BugDTO> bugMap = bugReportService.getAllBugsMap(examId, caseId);
         SupplementDTO supplementDTO = supplementService.findBySupId(supId, bugMap);
         model.addAttribute("supId", supplementDTO.getSupplementId());
         model.addAttribute("supplement", supplementDTO);
@@ -92,7 +98,11 @@ public class GraphController {
                                 Model model) throws Exception {
 
         List<SupplementDTO> supplements = supplementService.getSupplementTopInfoByMasterId(masterId);
-        BugDTO masterReport = bugReportService.getBugById(masterId);
+        long[] ids = masterReportService.getExamIdAndCaseIdByMasterId(masterId);
+        long examId = ids[0];
+        long caseId = ids[1];
+
+        BugDTO masterReport = bugReportService.getBugById(masterId, examId, caseId);
         model.addAttribute("aggReportId", "ML-AG-" + masterId.substring(10));
         model.addAttribute("supplements", supplements);
         model.addAttribute("masterReport", masterReport);

+ 12 - 0
src/main/java/com/mooctest/dao/AggTaskStatusDao.java

@@ -0,0 +1,12 @@
+package com.mooctest.dao;
+
+import com.mooctest.model.AggTaskStatus;
+import org.springframework.data.jpa.repository.JpaRepository;
+
+import java.sql.Timestamp;
+import java.util.List;
+
+public interface AggTaskStatusDao extends JpaRepository<AggTaskStatus, Long> {
+    List<AggTaskStatus> findByTaskIdOrderByStartTimeDesc(String taskId);
+    AggTaskStatus findByTaskIdAndStartTime(String taskId, Timestamp startTime);
+}

+ 2 - 0
src/main/java/com/mooctest/dao/MasterReportDao.java

@@ -20,4 +20,6 @@ public interface MasterReportDao extends JpaRepository<MasterReport, Long> {
     long countByExamIdAndCaseId(long examId, long caseId);
 
     long countByExamIdAndCaseIdAndStatus(long examId, long caseId, int status);
+
+    void deleteByExamIdAndCaseId(long examId, long caseId);
 }

+ 2 - 0
src/main/java/com/mooctest/dao/SupplementDao.java

@@ -20,4 +20,6 @@ public interface SupplementDao extends JpaRepository<SupplementItem, Long> {
 
     @Query("SELECT DISTINCT s.masterId, s.bugId FROM SupplementItem s WHERE s.masterId IN (:masterIds)")
     List<Object[]> findAllBugsByMasterIds(@Param("masterIds") List<String> masterIds);
+
+    void deleteByMasterIdIn(List<String> masterIds);
 }

+ 7 - 0
src/main/java/com/mooctest/event/Event.java

@@ -0,0 +1,7 @@
+package com.mooctest.event;
+
+public interface Event {
+
+    String getDescription();
+
+}

+ 42 - 0
src/main/java/com/mooctest/event/EventUtil.java

@@ -0,0 +1,42 @@
+package com.mooctest.event;
+
+import com.google.common.eventbus.AsyncEventBus;
+import com.google.common.eventbus.EventBus;
+import com.google.common.eventbus.Subscribe;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Component;
+
+import java.util.concurrent.Executors;
+
+@Component
+public class EventUtil {
+
+    private static Logger logger = LoggerFactory.getLogger(EventUtil.class);
+    private EventBus eventBus;
+
+    public EventUtil() {
+        eventBus = new AsyncEventBus(Executors.newCachedThreadPool());
+        eventBus.register(this);
+    }
+
+    public void register(Object listener) {
+        eventBus.register(listener);
+    }
+
+    public void unregister(Object listener) {
+        eventBus.unregister(listener);
+    }
+
+    public void post(Event event) {
+        logger.info("event fired: {}", event.getDescription());
+        eventBus.post(event);
+    }
+
+    @Subscribe
+    public void logDeadEvent(Event deadEvent) {
+        logger.info("dead event captured: {}", deadEvent.getDescription());
+    }
+
+}
+

+ 18 - 0
src/main/java/com/mooctest/event/TaskEndEvent.java

@@ -0,0 +1,18 @@
+package com.mooctest.event;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Getter;
+
+@Getter
+@Builder
+@AllArgsConstructor
+public class TaskEndEvent implements Event {
+    long examId;
+    long caseId;
+
+    @Override
+    public String getDescription() {
+        return String.format("bug report aggregate end, exam id: %d, case id: %d", examId, caseId);
+    }
+}

+ 22 - 0
src/main/java/com/mooctest/event/TaskEndListener.java

@@ -0,0 +1,22 @@
+package com.mooctest.event;
+
+import com.google.common.eventbus.Subscribe;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+@Component
+public class TaskEndListener implements InitializingBean {
+    @Autowired
+    private EventUtil eventUtil;
+
+    @Override
+    public void afterPropertiesSet() throws Exception {
+        eventUtil.register(this);
+    }
+
+    @Subscribe
+    public void updateTaskStatus(TaskEndEvent event) {
+
+    }
+}

+ 18 - 0
src/main/java/com/mooctest/event/TaskStartEvent.java

@@ -0,0 +1,18 @@
+package com.mooctest.event;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Getter;
+
+@Getter
+@Builder
+@AllArgsConstructor
+public class TaskStartEvent implements Event {
+    private long examId;
+    private long caseId;
+
+    @Override
+    public String getDescription() {
+        return String.format("bug report aggregate start, exam id: %d, case id: %d", examId, caseId);
+    }
+}

+ 36 - 0
src/main/java/com/mooctest/event/TaskStartListener.java

@@ -0,0 +1,36 @@
+package com.mooctest.event;
+
+import com.google.common.eventbus.Subscribe;
+import com.mooctest.model.AggTaskStatus;
+import com.mooctest.service.AggTaskStatusService;
+import com.mooctest.service.AggregationService;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+@Component
+public class TaskStartListener implements InitializingBean {
+
+    @Autowired
+    private EventUtil eventUtil;
+
+    @Autowired
+    private AggregationService aggregationService;
+
+    @Autowired
+    private AggTaskStatusService aggTaskStatusService;
+
+    @Override
+    public void afterPropertiesSet() throws Exception {
+        eventUtil.register(this);
+    }
+
+    @Subscribe
+    public void aggretate(TaskStartEvent event) {
+
+        AggTaskStatus aggTaskStatus = aggTaskStatusService.create(event.getExamId(), event.getCaseId());
+        aggregationService.aggregate(event.getExamId(), event.getCaseId());
+        AggTaskStatus result = aggTaskStatusService.updateStatus(aggTaskStatus.getTaskId(), aggTaskStatus.getStartTime());
+
+    }
+}

+ 23 - 8
src/main/java/com/mooctest/image/ImageDownload.java

@@ -12,21 +12,35 @@ import java.util.regex.Pattern;
 
 public class ImageDownload {
 
-    public static void createImage(String imgurl, String filePath) throws Exception {
+    public static void createImage(String imgurl, String taskId, String filePath) throws Exception {
 
-        URL url = new URL(encode(imgurl, "utf-8"));
+        String path = ImageUtil.IMAGE_PATH + taskId + "/" + filePath;
 
-        BufferedImage image = ImageIO.read(url);
+        File imageFile = new File(path);
+        if (checkExist(imageFile)) {
+            System.out.println("数据已存在");
+            return;
+        }
+
+        URL url = new URL(encode(imgurl, "utf-8"));
+        BufferedImage image = null;
+        int retryTimes = 3;
+        for (int i = 0; i < retryTimes; i++) {
+            try {
+                image = ImageIO.read(url);
+                break;
+            } catch (IOException e) {
+                System.out.println("image download fail " + filePath + " download times: " + (i + 1));
+                if (i+1 == retryTimes)
+                    throw e;
+            }
+        }
 //        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
 //        InputStream inputStream = conn.getInputStream(); // 通过输入流获得图片数据
 //        byte[] getData = readInputStream(inputStream); // 获得图片的二进制数据
 
         FingerPrint fp = new FingerPrint(image);
-        File imageFile = new File(ImageUtil.IMAGE_PATH + filePath);
-        if (checkExist(imageFile)) {
-            System.out.println("数据已存在");
-            return;
-        }
+
 
         FileOutputStream fos = new FileOutputStream(imageFile);
         fos.write(fp.getBinaryzationMatrix());
@@ -57,6 +71,7 @@ public class ImageDownload {
         }
         return false;
     }
+
     private static String zhPattern = "[\\u4e00-\\u9fa5]+";
 
     public static String encode(String str, String charset)

+ 34 - 0
src/main/java/com/mooctest/model/AggTaskStatus.java

@@ -0,0 +1,34 @@
+package com.mooctest.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+import java.sql.Timestamp;
+
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@Entity
+@Table(name = "agg_task_status")
+public class AggTaskStatus {
+
+    @Id
+    @GeneratedValue
+    private Long id;
+
+    @Column(name = "task_id")
+    private String taskId;
+
+    @Column(name = "status")
+    private int status;
+
+    @Column(name = "start_time")
+    private Timestamp startTime;
+
+    @Column(name = "end_time")
+    private Timestamp endTime;
+}

+ 57 - 0
src/main/java/com/mooctest/service/AggTaskStatusService.java

@@ -0,0 +1,57 @@
+package com.mooctest.service;
+
+import com.mooctest.dao.AggTaskStatusDao;
+import com.mooctest.model.AggTaskStatus;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.sql.Timestamp;
+import java.util.Date;
+import java.util.List;
+
+@Service
+public class AggTaskStatusService {
+    private static Logger logger = LoggerFactory.getLogger(AggTaskStatusService.class);
+    @Autowired
+    AggTaskStatusDao aggTaskStatusDao;
+
+    public AggTaskStatus getByTaskId(long examId, long caseId) {
+        String aggTaskId = genTaskId(examId, caseId);
+        List<AggTaskStatus> aggTaskStatus = aggTaskStatusDao.findByTaskIdOrderByStartTimeDesc(aggTaskId);
+        if (aggTaskStatus!= null && aggTaskStatus.size() > 0) {
+            return aggTaskStatus.get(0);
+        } else {
+            return null;
+        }
+
+    }
+
+    public AggTaskStatus create(long examId, long caseId) {
+        String aggTaskId = genTaskId(examId, caseId);
+        AggTaskStatus status = AggTaskStatus.builder()
+                .taskId(aggTaskId)
+                .status(0)
+                .startTime(new Timestamp(new Date().getTime()))
+                .build();
+        return aggTaskStatusDao.save(status);
+
+    }
+
+    public AggTaskStatus updateStatus(String taskId, Timestamp startTime) {
+        AggTaskStatus aggTaskStatus = aggTaskStatusDao.findByTaskIdAndStartTime(taskId, startTime);
+        if (aggTaskStatus != null) {
+            aggTaskStatus.setStatus(1);
+            aggTaskStatus.setEndTime(new Timestamp(new Date().getTime()));
+            return aggTaskStatusDao.save(aggTaskStatus);
+        } else {
+            logger.error("AggTaskStatusService find null, taskId={}, startTime={}", taskId, startTime.getTime());
+            return null;
+        }
+    }
+
+    private String genTaskId(long examId, long caseId) {
+        return examId + "-" + caseId;
+    }
+}

+ 182 - 0
src/main/java/com/mooctest/service/AggregationService.java

@@ -0,0 +1,182 @@
+package com.mooctest.service;
+
+import com.mooctest.cluster.ClusterAnalyzer;
+import com.mooctest.cluster.Group;
+import com.mooctest.data.BugDTO;
+import com.mooctest.data.DiffImg;
+import com.mooctest.data.DiffText;
+import com.mooctest.data.ReportDTO;
+import com.mooctest.image.ImageDownload;
+import com.mooctest.nlp.DistanceMatrix;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static java.util.stream.Collectors.toMap;
+
+@Service
+public class AggregationService {
+    @Autowired
+    BugReportService bugReportService;
+
+    @Autowired
+    MasterReportService masterReportService;
+
+    @Autowired
+    DiffTextService diffTextService;
+
+    @Autowired
+    DiffImgService diffImgService;
+
+    @Autowired
+    SupplementService supplementService;
+
+    public void aggregate(long examId, long caseId) {
+        downImg(examId, caseId);
+        List<ReportDTO> reports = bugReportService.getReports(examId, caseId);
+//        List<ReportDTO> reports = bugReportService.getReports(2614, 1490);
+        List<BugDTO> bugs = bugReportService.mergeAllBugs(reports);
+
+        ClusterAnalyzer<String> analyzer = new ClusterAnalyzer<>();
+        List<String> bugIds = bugs.stream().map(BugDTO::getId).collect(Collectors.toList());
+        double[][] distMatrix = DistanceMatrix.genHybridDist(bugs);
+        List<Set<String>> clusters = analyzer.HAC(distMatrix, bugIds, 0.23);
+
+        Map<String, BugDTO> bugMap = bugs.stream().collect(toMap(BugDTO::getId, Function.identity()));
+        bugs = null;
+        Map<String, Set<String>> masterClusterMap = new HashMap<>();
+        masterReportService.deleteAll(examId, caseId);
+        for (Set<String> cluster : clusters) {
+            String masterReport = masterReportService.findMasterReport(cluster, bugMap);
+            masterReportService.saveMasterReport(masterReport, examId, caseId, cluster);
+            masterClusterMap.put(masterReport, cluster);
+        }
+        clusters = null;
+
+        showClusterResult(masterClusterMap, bugMap);
+
+        //gen diff text
+        Map<String, List<DiffText>> masterDiffTextMap = diffTextService.genMasterDiffTextMap(masterClusterMap, bugMap);
+
+        showTextDiffResult(masterDiffTextMap, bugMap);
+
+        //gen diff img
+        Map<String, List<DiffImg>> masterDiffImgMap = diffImgService.genMasterDiffImgMap(masterClusterMap, bugMap);
+
+        //cluster diff text
+        Map<String, List<Group<String, DiffText>>> masterDiffTextClustersMap = diffTextService.genDiffTextClusters(masterDiffTextMap);
+
+        //cluster diff img
+        Map<String, List<Group<String, DiffImg>>> masterDiffImgClustersMap = diffImgService.genDiffImgClusters(masterDiffImgMap);
+
+        //combine cluster
+        combineCluster(masterDiffTextClustersMap, masterDiffImgClustersMap);
+
+        //top
+        supplementService.deleteAll(examId, caseId);
+        supplementService.rankAndStoreDiffText(masterDiffTextClustersMap);
+        supplementService.rankAndStoreDiffImg(masterDiffImgClustersMap);
+    }
+
+    private void combineCluster(Map<String, List<Group<String, DiffText>>> masterDiffTextClustersMap,
+                                Map<String, List<Group<String, DiffImg>>> masterDiffImgClustersMap) {
+        double LINK_THRESHOLD = 0.2;
+        masterDiffTextClustersMap.forEach((masterId, diffTextClusters) -> {
+            List<Group<String, DiffImg>> diffImgClusters = masterDiffImgClustersMap.get(masterId);
+            if (diffImgClusters != null) {
+
+                for (int i = 0; i < diffTextClusters.size(); i++) {
+                    Set<String> textBugIds = diffTextClusters.get(i).getCluster().stream().map(DiffText::getBugId).collect(Collectors.toSet());
+                    for (int j = 0; j < diffImgClusters.size(); j++) {
+                        Set<String> imgBugIds = diffImgClusters.get(j).getCluster().stream().map(DiffImg::getBugId).collect(Collectors.toSet());
+                        int inter = CollectionUtils.intersection(textBugIds, imgBugIds).size();
+                        int union = CollectionUtils.union(textBugIds, imgBugIds).size();
+                        double dist = 1.0 - (1.0 * inter) / (1.0 * union);
+                        if (dist > LINK_THRESHOLD) {
+                            diffImgClusters.get(j).setId(diffTextClusters.get(i).getId());
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+    private void downImg(long examId, long caseId) {
+
+        String taskId = examId + "_" + caseId;
+        List<ReportDTO> reports = bugReportService.getReports(examId, caseId);
+        List<BugDTO> bugs = bugReportService.mergeAllBugs(reports);
+
+        bugs.forEach(bug -> {
+            if (bug.getImgUrls() != null && bug.getImgUrls().length > 0) {
+                String[] imgUrls = bug.getImgUrls();
+                for (int i = 0; i < imgUrls.length; i++) {
+                    if (imgUrls[i] == null || !imgUrls[i].startsWith("http")) {
+                        continue;
+                    }
+
+                    String fileName = bug.getId() + "_" + i;
+                    System.out.println("正在下载 " + fileName);
+                    try {
+
+                        ImageDownload.createImage(imgUrls[i], taskId, fileName);
+                    } catch (Exception e) {
+                        System.out.println(imgUrls[i]);
+                        e.printStackTrace();
+                    }
+                }
+
+            }
+        });
+    }
+
+    private void showClusterResult(Map<String, Set<String>> masterClusterMap, Map<String, BugDTO> bugMap) {
+        System.out.println("showMasterReportResult:");
+        masterClusterMap.entrySet().forEach(entry -> {
+            System.out.println("[");
+            System.out.println("master:");
+            System.out.println(entry.getKey() + " " + bugMap.get(entry.getKey()).getDescription());
+            System.out.println();
+
+            entry.getValue().forEach(bugId -> {
+                System.out.println(bugId + " " + bugMap.get(bugId).getDescription());
+            });
+            System.out.println("]");
+        });
+
+    }
+
+
+    private void showTextDiffResult(Map<String, List<DiffText>> masterDiffTextMap, Map<String, BugDTO> bugMap) {
+        System.out.println("showDiffResult:");
+        masterDiffTextMap.entrySet().forEach(entry -> {
+            System.out.println("[");
+            System.out.println("master:");
+            System.out.println(entry.getKey() + " " + bugMap.get(entry.getKey()).getDescription());
+            System.out.println();
+
+            entry.getValue().forEach(diffText -> {
+                System.out.println(diffText.getBugId() + "_" + diffText.getIndex() + diffText.getSentence());
+            });
+            System.out.println("]");
+        });
+
+    }
+    private void showClusterResult(List<Set<String>> results, Map<String, BugDTO> bugMap) {
+        System.out.println("size: " + results.size());
+        results.forEach(cluster -> {
+            System.out.println("[");
+            cluster.forEach(bugId -> {
+                System.out.println(bugId + " " + bugMap.get(bugId).getDescription());
+            });
+            System.out.println("]");
+        });
+    }
+}

+ 2 - 1
src/main/java/com/mooctest/service/BugReportService.java

@@ -11,6 +11,7 @@ public interface BugReportService {
     List<BugDTO> mergeAllBugs(List<ReportDTO> reportDTOs);
     List<BugDTO> getAllBugs(long examId, long caseId);
     Map<String, BugDTO> getAllBugsMap(long examId, long caseId);
-    BugDTO getBugById(String bugId);
+//    BugDTO getBugById(String bugId);
+    BugDTO getBugById(String bugId, long examId, long caseId);
 //    Map<String, List<BugDTO>> getMasterBugMap(Map<String, List<String>> masterBugIdsMap, Map<String, BugDTO> bugsMap);
 }

+ 14 - 5
src/main/java/com/mooctest/service/DiffImgService.java

@@ -7,6 +7,7 @@ import com.mooctest.data.BugDTO;
 import com.mooctest.data.DiffImg;
 import com.mooctest.nlp.DistanceMatrix;
 import com.mooctest.util.IndexUtil;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 
 import java.io.File;
@@ -20,6 +21,9 @@ import static java.util.stream.Collectors.toMap;
 @Service
 public class DiffImgService {
 
+    @Autowired
+    MasterReportService masterReportService;
+
     public Map<String, List<Group<String, DiffImg>>> genDiffImgClusters(Map<String, List<DiffImg>> masterDiffImgMap) {
         ClusterAnalyzer<String> analyzer = new ClusterAnalyzer<>();
         Map<String, List<Group<String, DiffImg>>> masterDiffImgClustersMap = new HashMap<>();
@@ -71,18 +75,23 @@ public class DiffImgService {
             Set<String> cluster,
             Map<String, BugDTO> bugMap) {
 
+        long[] ids = masterReportService.getExamIdAndCaseIdByMasterId(masterId);
+        long examId = ids[0];
+        long caseId = ids[1];
+        String taskId = examId + "_" + caseId;
+
         BugDTO masterReport = bugMap.get(masterId);
         List<DiffImg> diffImgs = new LinkedList<>();
-        cluster.forEach(bugId -> {
+        cluster.forEach(bugId -> { //遍历所有报告,找出每个聚类的差异图片
             if (!bugId.equals(masterId)) {
                 BugDTO report = bugMap.get(bugId);
                 String[] imgUrls = report.getImgUrls();
                 for (int i = 0; i < imgUrls.length; i++) {
-                    File imgFile = checkFileExist(genImagePath(bugId, i));
+                    File imgFile = checkFileExist(genImagePath(taskId, bugId, i));
                     if (imgFile == null) {
                         continue;
                     }
-                    boolean isSim = isSimilarWithMasterReport(masterReport, imgFile);
+                    boolean isSim = isSimilarWithMasterReport(masterReport, imgFile, taskId);
                     if (!isSim) {
                         diffImgs.add(new DiffImg(masterId, bugId, imgUrls[i], i));
                     }
@@ -92,12 +101,12 @@ public class DiffImgService {
         return diffImgs;
     }
 
-    private boolean isSimilarWithMasterReport(BugDTO masterReport, File imgFile) {
+    private boolean isSimilarWithMasterReport(BugDTO masterReport, File imgFile, String taskId) {
 
         String[] masterImgs = masterReport.getImgUrls();
         FingerPrint reportImgFingerPrint = readImgFingerPrint(imgFile);
         for (int i = 0; i < masterImgs.length; i++) {
-            String imgPath2 = genImagePath(masterReport.getId(), i);
+            String imgPath2 = genImagePath(taskId, masterReport.getId(), i);
             File img = checkFileExist(imgPath2);
             if (img == null) {
                 continue;

+ 5 - 5
src/main/java/com/mooctest/service/GraphService.java

@@ -43,7 +43,7 @@ public class GraphService {
             supNode.fluentPut("id", supId)
                     .fluentPut("group", 2)
                     .fluentPut("name", "补充点" + idx)
-                    .fluentPut("href", "/graphDetail/supReport?supId="+supId+"&masterId="+masterId);
+                    .fluentPut("href", "/graphDetail/supReport?supId=" + supId + "&masterId=" + masterId);
             supNodes.add(supNode);
 
             //add raw report node
@@ -59,7 +59,7 @@ public class GraphService {
                 rawNode.fluentPut("id", bugId)
                         .fluentPut("group", 3)
                         .fluentPut("name", "R" + bugId.substring(12))
-                        .fluentPut("href", "/graphDetail/rawReport?bugId="+bugId);
+                        .fluentPut("href", "/graphDetail/rawReport?bugId=" + bugId + "&masterId=" + masterId);
                 rawNodes.add(rawNode);
             }
             supNode.put("children", rawNodes);
@@ -127,7 +127,7 @@ public class GraphService {
             node.fluentPut("id", supId)
                     .fluentPut("group", 2)
                     .fluentPut("name", "Sup-" + idx)
-                    .fluentPut("href", "/graphDetail/supReport?supId="+supId+"&masterId="+masterId);
+                    .fluentPut("href", "/graphDetail/supReport?supId=" + supId + "&masterId=" + masterId);
             nodes.add(node);
 
             //add raw report node
@@ -142,7 +142,7 @@ public class GraphService {
                 rawNode.fluentPut("id", bugId)
                         .fluentPut("group", 6)
                         .fluentPut("name", "ML-" + bugId.substring(10))
-                        .fluentPut("href", "/graphDetail/rawReport?bugId="+bugId);
+                        .fluentPut("href", "/graphDetail/rawReport?bugId=" + bugId + "&masterId=" + masterId);
                 nodes.add(rawNode);
             }
             idx++;
@@ -158,7 +158,7 @@ public class GraphService {
         masterNode.fluentPut("id", masterId)
                 .fluentPut("group", 2.1)
                 .fluentPut("name", "Master-ML-" + masterId.substring(10))
-                .fluentPut("href", "/graphDetail/rawReport?bugId="+masterId+"&isMaster=true");
+                .fluentPut("href", "/graphDetail/rawReport?bugId=" + masterId + "&isMaster=true" + "&masterId=" + masterId);
         nodes.add(masterNode);
 
         return nodes;

+ 2 - 2
src/main/java/com/mooctest/service/MasterReportService.java

@@ -109,7 +109,7 @@ public class MasterReportService {
         long numOfAggReport = masterReportDao.countByExamIdAndCaseId(examId, caseId);
         return numOfAggReport > 0;
     }
-    public void deleteAll() {
-        masterReportDao.deleteAll();
+    public void deleteAll(long examId, long caseId) {
+        masterReportDao.deleteByExamIdAndCaseId(examId, caseId);
     }
 }

+ 18 - 7
src/main/java/com/mooctest/service/SupplementService.java

@@ -1,6 +1,5 @@
 package com.mooctest.service;
 
-import com.hankcs.hanlp.HanLP;
 import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
 import com.hankcs.hanlp.utility.SentencesUtil;
 import com.mooctest.cluster.Group;
@@ -31,8 +30,10 @@ public class SupplementService {
     @Autowired
     SupplementDao supplementDao;
 
+    @Autowired
+    MasterReportService masterReportService;
+
     public void rankAndStoreDiffText(Map<String, List<Group<String, DiffText>>> masterDiffTextClustersMap) {
-        supplementDao.deleteAllInBatch();
         masterDiffTextClustersMap.forEach((masterId, diffTextClusters) -> {
 
             diffTextClusters.forEach(group -> {
@@ -124,8 +125,13 @@ public class SupplementService {
     }
 
     public double getSim(DiffImg diffImg1, DiffImg diffImg2) {
-        File img1 = new File(ImageUtil.genImagePath(diffImg1.getBugId(), diffImg1.getIndex()));
-        File img2 = new File(ImageUtil.genImagePath(diffImg2.getBugId(), diffImg2.getIndex()));
+        long[] ids = masterReportService.getExamIdAndCaseIdByMasterId(diffImg1.getMasterId());
+        long examId = ids[0];
+        long caseId = ids[1];
+        String taskId = examId + "_" + caseId;
+
+        File img1 = new File(ImageUtil.genImagePath(taskId, diffImg1.getBugId(), diffImg1.getIndex()));
+        File img2 = new File(ImageUtil.genImagePath(taskId, diffImg2.getBugId(), diffImg2.getIndex()));
         if (!img1.exists() || !img2.exists()) {
             return 0;
         }
@@ -143,7 +149,7 @@ public class SupplementService {
 
 
         supplementBugNumsMap.forEach((supId, num) -> {
-            SupplementDTO su = buileSupplement(supId, supplementMap, bugMap);
+            SupplementDTO su = buildSupplement(supId, supplementMap, bugMap);
             sortedSupplements.add(su);
         });
         return sortedSupplements;
@@ -153,7 +159,7 @@ public class SupplementService {
         return getSupplementByMasterId(masterId, null);
     }
 
-    private SupplementDTO buileSupplement(String supId,
+    private SupplementDTO buildSupplement(String supId,
                                           Map<String, List<SupplementItem>> supplementMap,
                                           Map<String, BugDTO> bugMap) {
 
@@ -275,7 +281,7 @@ public class SupplementService {
         List<SupplementItem> items = supplementDao.findBySupplementId(supId);
         Map<String, List<SupplementItem>> supplementMap = items.stream().collect(groupingBy(SupplementItem::getSupplementId));
 
-        return buileSupplement(supId, supplementMap, bugMap);
+        return buildSupplement(supId, supplementMap, bugMap);
     }
 
     public Map<String, List<String>> getMaster2BugIdsMap(List<String> masterIds) {
@@ -298,4 +304,9 @@ public class SupplementService {
         return masterBugIdsMap;
     }
 
+    public void deleteAll(long examId, long caseId) {
+
+        List<String> masterIds = masterReportService.getAllMasterIdByExamIdAndCaseId(examId, caseId);
+        supplementDao.deleteByMasterIdIn(masterIds);
+    }
 }

+ 5 - 0
src/main/java/com/mooctest/service/impl/BugReportServiceImpl.java

@@ -85,6 +85,11 @@ public class BugReportServiceImpl implements BugReportService {
 
     }
 
+    public BugDTO getBugById(String bugId, long examId, long caseId) {
+        return getAllBugsMap(examId, caseId).get(bugId);
+
+    }
+
     public Map<String, List<BugDTO>> getMasterBugMap(Map<String, List<String>> masterBugIdsMap, Map<String, BugDTO> bugsMap) {
 
         Map<String, List<BugDTO>> masterBugMap = new HashMap<>();

+ 2 - 2
src/main/java/com/mooctest/util/ImageUtil.java

@@ -34,7 +34,7 @@ public class ImageUtil {
         return fp;
     }
 
-    public static String genImagePath(String bugId, int idx) {
-        return IMAGE_PATH + bugId + "_" + idx;
+    public static String genImagePath(String taskId, String bugId, int idx) {
+        return IMAGE_PATH + taskId + "/" + bugId + "_" + idx;
     }
 }

+ 54 - 44
src/main/resources/static/js/app_info.js

@@ -18,58 +18,68 @@ $(function () {
         $(this).find('span').text('融合中...');
 
 		$.get('/aggregate', {examId: examId, caseId: caseId}, function () {
+			setInterval(function () {
+                $.get('/agg_task_status', {examId: examId, caseId: caseId}, function(result) {
+                	if (result == 1) {
 
-			debugger
-			var rand = Math.round(Math.random() * 100);
-			var count = $('#report-list tbody tr').length;
+                    	agg_success();
+					}
+				});
+			}, 10000)
+        });
+	});
 
-			$('#report-list tbody tr').each(function (i, el) {
-				setTimeout(function () {
-					rand = Math.round(Math.random() * 100);
+});
 
-					// 在此条记录最后添加loading的图标
-					$(el).find('td:last').after('<td><i class=\'fa fa-spinner\'></i></td>');
+function agg_success() {
 
-					// get value
-					var _id = $(el).children('td.report-id').attr('value');
+    var rand = Math.round(Math.random() * 100);
+    var count = $('#report-list tbody tr').length;
 
-					$.get('/aggregate_info', { bugId: _id }, function(data) {
-						var aggregator = data.masterId;
-						var printAggregator = 'ML-AG-' + data.masterId.substring(10);
+    $('#report-list tbody tr').each(function (i, el) {
+        setTimeout(function () {
+            rand = Math.round(Math.random() * 100);
 
-						function fadeInAggreagator() {
-							// $(el).children('td.report-aggregator').hide();
-							if($.trim(aggregator)) {//找到了重复报告
-								$(el).children('td.report-aggregator').attr('value', aggregator);
-								var aggregatorHtml = "<a href=/report?masterId="+aggregator+"&examId="+examId+"&caseId="+caseId+">"+printAggregator+"</a>";
-								$(el).children('td.report-aggregator').html(aggregatorHtml).fadeIn(rand * 2);
-							}
-						}
+            // 在此条记录最后添加loading的图标
+            $(el).find('td:last').after('<td><i class=\'fa fa-spinner\'></i></td>');
 
-						function fadeInStatus() {
-							$(el).find('td:last').hide();
-							// 如果data.assignee 不为None,则最后添加对勾图标,否则添加叉叉图标
-							if($.trim(aggregator)) {//找到了负责人
-								$(el).find('td:last').html('<i class=\'fa fa-check-circle text-inverse\'></i>').fadeIn(rand * 2);
-								$(el).find('td:last').addClass('row-success');
+            // get value
+            var _id = $(el).children('td.report-id').attr('value');
 
-							} else {
-								$(el).find('td:last').html('<i class=\'fa fa-history text-inverse\'></i>').fadeIn(rand * 2);
-								$(el).find('td:last').addClass('row-danger');
-							}
-						}
+            $.get('/aggregate_info', { bugId: _id }, function(data) {
+                var aggregator = data.masterId;
+                var printAggregator = 'ML-AG-' + data.masterId.substring(10);
 
-						setTimeout(fadeInAggreagator, 0);
-						setTimeout(fadeInStatus, rand);
-					});
-				}, rand * 3 *i);
-			});
+                function fadeInAggreagator() {
+                    // $(el).children('td.report-aggregator').hide();
+                    if($.trim(aggregator)) {//找到了重复报告
+                        $(el).children('td.report-aggregator').attr('value', aggregator);
+                        var aggregatorHtml = "<a href=/report?masterId="+aggregator+"&examId="+examId+"&caseId="+caseId+">"+printAggregator+"</a>";
+                        $(el).children('td.report-aggregator').html(aggregatorHtml).fadeIn(rand * 2);
+                    }
+                }
 
-			//假设此time之后所有assign操作均已完成
-			setTimeout(function () {
-				location.reload();
-			}, rand * 3 * (count+1));
-        });
-	});
+                function fadeInStatus() {
+                    $(el).find('td:last').hide();
+                    // 如果data.assignee 不为None,则最后添加对勾图标,否则添加叉叉图标
+                    if($.trim(aggregator)) {//找到了负责人
+                        $(el).find('td:last').html('<i class=\'fa fa-check-circle text-inverse\'></i>').fadeIn(rand * 2);
+                        $(el).find('td:last').addClass('row-success');
+
+                    } else {
+                        $(el).find('td:last').html('<i class=\'fa fa-history text-inverse\'></i>').fadeIn(rand * 2);
+                        $(el).find('td:last').addClass('row-danger');
+                    }
+                }
+
+                setTimeout(fadeInAggreagator, 0);
+                setTimeout(fadeInStatus, rand);
+            });
+        }, rand * 3 *i);
+    });
 
-});
+    //假设此time之后所有assign操作均已完成
+    setTimeout(function () {
+        location.reload();
+    }, rand * 3 * (count+1));
+};