data_preprocessing_utils_1.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. def convert_MAP(directory, output_directory, min_shape, file_format = '.npy', search_keys = None, dtype = np.float32):
  2. '''
  3. This program loops through given raw_data directory
  4. and converts .mat files to .npy files
  5. '''
  6. new_dir = os.path.join(os.getcwd(), output_directory)
  7. if not os.path.exists(new_dir):
  8. os.mkdir(new_dir)
  9. else:
  10. shutil.rmtree(new_dir)
  11. os.mkdir(new_dir)
  12. for file in os.listdir(directory):
  13. filename = os.fsdecode(file)
  14. if filename.endswith(".mat"):
  15. #print(os.path.join(directory, filename))
  16. filepath = os.path.join(directory, filename)
  17. array_dict = {}
  18. try:
  19. f = h5py.File(filepath, 'r')
  20. except:
  21. f = sio.loadmat(filepath)
  22. for k, v in f.items():
  23. array_dict[k] = np.array(v, dtype = np.float32)
  24. # As we only need image info from dict (the last key) we do this
  25. if search_keys == None:
  26. search_keys = 'map' # out of struct of .mat files want "map"
  27. filtered_dict = dict(filter(lambda item: search_keys in item[0], array_dict.items()))
  28. else:
  29. filtered_dict = {}
  30. for i in range(len(search_keys)):
  31. search_key = search_keys[i]
  32. if search_key in array_dict:
  33. filtered_dict[search_key] = array_dict[search_key]
  34. if len(filtered_dict) == 0:
  35. print('No Data to Meet Search Key Requirements: Datapoint Rejected -> ' + filepath)
  36. else:
  37. #print(list(array_dict.keys()))
  38. #print(filtered_dict)
  39. arrays = []
  40. for k, v in filtered_dict.items():
  41. temp = np.transpose(v.astype(np.float32))
  42. # To normalize data between [-1,1], use -> arrays = arrays/(np.max(arrays)/2) - 1
  43. # To normalize data between [0,1], use -> arrays = arrays/(np.max(arrays))
  44. # To normalize data between [0,255],
  45. # use -> arrays = (arrays/(np.max(arrays))*255).astype(np.uint8)
  46. temp = temp/(np.max(temp))
  47. arrays.append(temp)
  48. for i in range(len(arrays)):
  49. if len(arrays[i].shape) > 2:
  50. #print(arrays[i].shape)
  51. arrays[i] = np.mean(arrays[i], axis = 2)
  52. for i in range(len(arrays)):
  53. new_dir_filepath = os.path.join(new_dir, filename.strip('.mat')
  54. + '_index'+str(i) + file_format)
  55. array = arrays[i]
  56. if array.shape[0] >= min_shape[0] and array.shape[1] >= min_shape[1]:
  57. if file_format == '.npy':
  58. np.save(new_dir_filepath, array, allow_pickle=True, fix_imports=True)
  59. else:
  60. imageio.imwrite(new_dir_filepath, array)
  61. elif i == 0:
  62. print('Min Size Not Met: Datapoint Rejected -> ' + filepath)
  63. return os.path.join(os.getcwd(), output_directory)
  64. ##################################################################################################################################
  65. # Data Cleaning Procedures: