123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- package com.mooctest.nlp;
- import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
- import com.mooctest.image.FingerPrint;
- import com.mooctest.data.BugDTO;
- import com.mooctest.data.DiffImg;
- import com.mooctest.util.Doc2VecUtil;
- import com.mooctest.util.ImageUtil;
- import java.io.File;
- import java.util.List;
- import static com.mooctest.util.ImageUtil.IMAGE_PATH;
- import static com.mooctest.util.ImageUtil.checkFileExist;
- public class DistanceMatrix {
- public static double[][] genWord2VecDist(List<BugDTO> bugDTOs) {
- int n = bugDTOs.size();
- double[][] proximity = new double[n][];
- DocVectorModel docVectorModel = Doc2VecUtil.loadModel();
- for (int i = 0; i < n; i++) {
- proximity[i] = new double[i + 1];
- for (int j = 0; j < i; j++) {
- proximity[i][j] = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription());
- }
- }
- return proximity;
- }
- public static double[][] genImgDist(List<DiffImg> diffImgs) {
- int n = diffImgs.size();
- double[][] distMatrix = new double[n][];
- for (int i = 0; i < n; i++) {
- distMatrix[i] = new double[i + 1];
- for (int j = 0; j < i; j++) {
- DiffImg diffImg1 = diffImgs.get(i);
- DiffImg diffImg2 = diffImgs.get(j);
- File file1 = checkFileExist(IMAGE_PATH + diffImg1.getBugId() + "_" + i);
- File file2 = checkFileExist(IMAGE_PATH + diffImg2.getBugId() + "_" + j);
- if (file1 == null || file2 == null) {
- continue;
- }
- FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1);
- FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2);
- float sim = fp1.compare(fp2);
- distMatrix[i][j] = 1 - sim;
- }
- }
- return distMatrix;
- }
- public static double[][] genHybridDist(List<BugDTO> bugDTOs) {
- int n = bugDTOs.size();
- double[][] proximity = new double[n][];
- DocVectorModel docVectorModel = Doc2VecUtil.loadModel();
- for (int i = 0; i < n; i++) {
- proximity[i] = new double[i + 1];
- for (int j = 0; j < i; j++) {
- double txtDist = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription());
- proximity[i][j] = txtDist;
- if (checkBugImageNotNull(bugDTOs.get(i)) && checkBugImageNotNull(bugDTOs.get(j))) {
- double imgDist = calBugImgDist(bugDTOs.get(i), bugDTOs.get(j));
- if (imgDist <= 0.15) {
- proximity[i][j] = proximity[i][j] * 0.9;
- }
- }
- }
- }
- return proximity;
- }
- private static double calBugImgDist(BugDTO bug1, BugDTO bug2) {
- float max_sim = 0;
- for (int i = 0; i < bug1.getImgUrls().length; i++) {
- for (int j = 0; j < bug2.getImgUrls().length; j++) {
- File file1 = checkFileExist(IMAGE_PATH + bug1.getId() + "_" + i);
- File file2 = checkFileExist(IMAGE_PATH + bug2.getId() + "_" + i);
- if (file1 == null || file2 == null) {
- continue;
- }
- FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1);
- FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2);
- float sim = fp1.compare(fp2);
- max_sim = sim > max_sim ? sim : max_sim;
- }
- }
- return 1 - max_sim;
- }
- private static boolean checkBugImageNotNull(BugDTO bug) {
- return ((bug.getImgUrls() != null) && bug.getImgUrls().length > 0);
- }
- }
|