DistanceMatrix.java 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. package com.mooctest.nlp;
  2. import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
  3. import com.mooctest.image.FingerPrint;
  4. import com.mooctest.data.BugDTO;
  5. import com.mooctest.data.DiffImg;
  6. import com.mooctest.util.Doc2VecUtil;
  7. import com.mooctest.util.ImageUtil;
  8. import java.io.File;
  9. import java.util.List;
  10. import static com.mooctest.util.ImageUtil.IMAGE_PATH;
  11. import static com.mooctest.util.ImageUtil.checkFileExist;
  12. public class DistanceMatrix {
  13. public static double[][] genWord2VecDist(List<BugDTO> bugDTOs) {
  14. int n = bugDTOs.size();
  15. double[][] proximity = new double[n][];
  16. DocVectorModel docVectorModel = Doc2VecUtil.loadModel();
  17. for (int i = 0; i < n; i++) {
  18. proximity[i] = new double[i + 1];
  19. for (int j = 0; j < i; j++) {
  20. proximity[i][j] = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription());
  21. }
  22. }
  23. return proximity;
  24. }
  25. public static double[][] genImgDist(List<DiffImg> diffImgs) {
  26. int n = diffImgs.size();
  27. double[][] distMatrix = new double[n][];
  28. for (int i = 0; i < n; i++) {
  29. distMatrix[i] = new double[i + 1];
  30. for (int j = 0; j < i; j++) {
  31. DiffImg diffImg1 = diffImgs.get(i);
  32. DiffImg diffImg2 = diffImgs.get(j);
  33. File file1 = checkFileExist(IMAGE_PATH + diffImg1.getBugId() + "_" + i);
  34. File file2 = checkFileExist(IMAGE_PATH + diffImg2.getBugId() + "_" + j);
  35. if (file1 == null || file2 == null) {
  36. continue;
  37. }
  38. FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1);
  39. FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2);
  40. float sim = fp1.compare(fp2);
  41. distMatrix[i][j] = 1 - sim;
  42. }
  43. }
  44. return distMatrix;
  45. }
  46. public static double[][] genHybridDist(List<BugDTO> bugDTOs) {
  47. int n = bugDTOs.size();
  48. double[][] proximity = new double[n][];
  49. DocVectorModel docVectorModel = Doc2VecUtil.loadModel();
  50. for (int i = 0; i < n; i++) {
  51. proximity[i] = new double[i + 1];
  52. for (int j = 0; j < i; j++) {
  53. double txtDist = 1 - docVectorModel.similarity(bugDTOs.get(i).getDescription(), bugDTOs.get(j).getDescription());
  54. proximity[i][j] = txtDist;
  55. if (checkBugImageNotNull(bugDTOs.get(i)) && checkBugImageNotNull(bugDTOs.get(j))) {
  56. double imgDist = calBugImgDist(bugDTOs.get(i), bugDTOs.get(j));
  57. if (imgDist <= 0.15) {
  58. proximity[i][j] = proximity[i][j] * 0.9;
  59. }
  60. }
  61. }
  62. }
  63. return proximity;
  64. }
  65. private static double calBugImgDist(BugDTO bug1, BugDTO bug2) {
  66. float max_sim = 0;
  67. for (int i = 0; i < bug1.getImgUrls().length; i++) {
  68. for (int j = 0; j < bug2.getImgUrls().length; j++) {
  69. File file1 = checkFileExist(IMAGE_PATH + bug1.getId() + "_" + i);
  70. File file2 = checkFileExist(IMAGE_PATH + bug2.getId() + "_" + i);
  71. if (file1 == null || file2 == null) {
  72. continue;
  73. }
  74. FingerPrint fp1 = ImageUtil.readImgFingerPrint(file1);
  75. FingerPrint fp2 = ImageUtil.readImgFingerPrint(file2);
  76. float sim = fp1.compare(fp2);
  77. max_sim = sim > max_sim ? sim : max_sim;
  78. }
  79. }
  80. return 1 - max_sim;
  81. }
  82. private static boolean checkBugImageNotNull(BugDTO bug) {
  83. return ((bug.getImgUrls() != null) && bug.getImgUrls().length > 0);
  84. }
  85. }