package com.mooctest.nlp; import com.hankcs.hanlp.mining.word2vec.DocVectorModel; import com.mooctest.image.FingerPrint; import com.mooctest.data.BugDTO; import com.mooctest.data.DiffImg; import com.mooctest.util.Doc2VecUtil; import com.mooctest.util.ImageUtil; import java.io.File; import java.util.List; import static com.mooctest.util.ImageUtil.IMAGE_PATH; import static com.mooctest.util.ImageUtil.checkFileExist; public class DistanceMatrix { public static double[][] genWord2VecDist(List bugDTOs) { int n = bugDTOs.size(); double[][] proximity = new double[n][]; DocVectorModel docVectorModel = Doc2VecUtil.loadModel(); for (int i = 0; i < n; i++) { proximity[i] = new double[i + 1]; for (int j = 0; j < i; j++) { proximity[i][j] = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription()); } } return proximity; } public static double[][] genImgDist(List diffImgs) { int n = diffImgs.size(); double[][] distMatrix = new double[n][]; for (int i = 0; i < n; i++) { distMatrix[i] = new double[i + 1]; for (int j = 0; j < i; j++) { DiffImg diffImg1 = diffImgs.get(i); DiffImg diffImg2 = diffImgs.get(j); File file1 = checkFileExist(IMAGE_PATH + diffImg1.getBugId() + "_" + i); File file2 = checkFileExist(IMAGE_PATH + diffImg2.getBugId() + "_" + j); if (file1 == null || file2 == null) { continue; } FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1); FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2); float sim = fp1.compare(fp2); distMatrix[i][j] = 1 - sim; } } return distMatrix; } public static double[][] genHybridDist(List bugDTOs) { int n = bugDTOs.size(); double[][] proximity = new double[n][]; DocVectorModel docVectorModel = Doc2VecUtil.loadModel(); for (int i = 0; i < n; i++) { proximity[i] = new double[i + 1]; for (int j = 0; j < i; j++) { double txtDist = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription()); proximity[i][j] = txtDist; if (checkBugImageNotNull(bugDTOs.get(i)) && checkBugImageNotNull(bugDTOs.get(j))) { double imgDist = calBugImgDist(bugDTOs.get(i), bugDTOs.get(j)); if (imgDist <= 0.15) { proximity[i][j] = proximity[i][j] * 0.9; } } } } return proximity; } private static double calBugImgDist(BugDTO bug1, BugDTO bug2) { float max_sim = 0; for (int i = 0; i < bug1.getImgUrls().length; i++) { for (int j = 0; j < bug2.getImgUrls().length; j++) { File file1 = checkFileExist(IMAGE_PATH + bug1.getId() + "_" + i); File file2 = checkFileExist(IMAGE_PATH + bug2.getId() + "_" + i); if (file1 == null || file2 == null) { continue; } FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1); FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2); float sim = fp1.compare(fp2); max_sim = sim > max_sim ? sim : max_sim; } } return 1 - max_sim; } private static boolean checkBugImageNotNull(BugDTO bug) { return ((bug.getImgUrls() != null) && bug.getImgUrls().length > 0); } }