get_unique_inconsistency.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. """
  2. # Part of localization phase
  3. """
  4. import os
  5. import sys
  6. import math
  7. from itertools import combinations
  8. import configparser
  9. import pickle
  10. import warnings
  11. from datetime import datetime
  12. warnings.filterwarnings("ignore")
  13. os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' # 只显示 warning 和 Error
  14. os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  15. os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  16. start_time = datetime.now()
  17. distance_threshold = 0.005
  18. def get_all_inputs():
  19. """
  20. divide inconsistencies into different backends pair
  21. """
  22. ultimate_localization_inconststency = {exp: {bk_pr: list() for bk_pr in backend_pairs} for exp in exps}
  23. print(ultimate_localization_inconststency)
  24. for exp in exps:
  25. exp_dir = os.path.join(output_dir, exp)
  26. metrics_dir = os.path.join(exp_dir, "metrics_result")
  27. exp_metrics_path = os.path.join(metrics_dir, "{}_D_MAD_result.csv".format(exp))
  28. metrics_result = {}
  29. with open(exp_metrics_path, "r") as fr:
  30. lines = fr.readlines()[1:]
  31. for line in lines:
  32. line_split = line.split(",")
  33. # incon_idntfr like mobilenet.1.00.224-imagenet_origin0_theano_cntk_input1494
  34. incon_idntfr, incon_value = line_split[0], float(line_split[1])
  35. metrics_result[incon_idntfr] = incon_value
  36. for incon_idntfr in metrics_result.keys():
  37. incon_idntfr_splits = incon_idntfr.split("_")
  38. bks_pair = "{}_{}".format(incon_idntfr_splits[2], incon_idntfr_splits[3])
  39. if bks_pair in backend_pairs:
  40. incon_tuple = (incon_idntfr, metrics_result[incon_idntfr])
  41. ultimate_localization_inconststency[exp][bks_pair].append(incon_tuple)
  42. return ultimate_localization_inconststency
  43. def set_calculation(incons: list):
  44. origin_incons = dict()
  45. mutated_incons = dict()
  46. for incon in incons:
  47. # incon_idntfr like mobilenet.1.00.224-imagenet_origin0_theano_cntk_input1494
  48. incon_idntfr, incon_value = incon[0].replace("\n", ""), incon[1]
  49. incon_tuple = (incon_idntfr, incon_value)
  50. if not math.isnan(incon_value) and incon_value >= threshold:
  51. incon_idntfr_splits = incon_idntfr.split("_")
  52. # input_key = incon_idntfr_splits[-1] #input1494
  53. input_key = f"{incon_idntfr_splits[0]}_{incon_idntfr_splits[-1]}" # lenet5-mnist_input1494
  54. if incon_idntfr_splits[1] == "origin0":
  55. origin_incons = add_into_dict(input_key, incon_tuple, origin_incons)
  56. else:
  57. mutated_incons = add_into_dict(input_key, incon_tuple, mutated_incons)
  58. mutated_greater = list()
  59. origin_greater = list()
  60. """mutated higher"""
  61. for ik, t in mutated_incons.items():
  62. if ik not in origin_incons.keys():
  63. mutated_greater.append(t)
  64. """origin higher"""
  65. for ik, t in origin_incons.items():
  66. if ik not in mutated_incons.keys():
  67. origin_greater.append(t)
  68. return list(origin_incons.values()), list(mutated_incons.values()), origin_greater, mutated_greater
  69. def add_into_dict(input_key, incon_tuple, incon_dict):
  70. """
  71. Two step:
  72. 0. under the same backends pair
  73. * 1. the same input, choose largest.
  74. 2. different inputs with small distance. Do not update
  75. """
  76. if input_key not in incon_dict.keys() or incon_dict[input_key][1] < incon_tuple[1]:
  77. incon_dict[input_key] = incon_tuple
  78. return incon_dict
  79. def close_incons_reduction(incons: list):
  80. """
  81. Two step:
  82. 0. under the same backends pair
  83. 1. the same input, choose largest.(done before)
  84. * 2. different inputs with small distance. Do not update(not used)
  85. """
  86. def is_duplicate(t: tuple, li: list):
  87. """unique inconsistency"""
  88. for l in li:
  89. if abs(t[1] - l[1]) <= distance_threshold:
  90. return True,l
  91. return False,None
  92. result = list()
  93. relation_dict = dict()
  94. for incon in incons:
  95. status, l = is_duplicate(incon, result)
  96. if not status:
  97. result.append(incon)
  98. else:
  99. relation_dict[incon] = l
  100. return result,relation_dict
  101. def get_diff_set(a_list, b_list):
  102. """Get results of a - b"""
  103. a_dict = {tpl[0]: tpl for tpl in a_list}
  104. b_dict = {tpl[0]: tpl for tpl in b_list}
  105. result_set = list()
  106. for ik, t in a_dict.items():
  107. if ik not in b_dict.keys():
  108. result_set.append(t)
  109. return set(result_set)
  110. def update_localize_model_inputs(exp, idntfrs: list, localizes: dict):
  111. for idntfr in idntfrs:
  112. # idntfr like mobilenet.1.00.224-imagenet_origin0_theano_cntk_input1494
  113. idntfr_splits = idntfr.split("_")
  114. model_input = "_".join([idntfr_splits[0], idntfr_splits[1], idntfr_splits[-1]])
  115. localizes[exp].add(model_input)
  116. return localizes
  117. if __name__ == '__main__':
  118. # get experiments configuration
  119. config_name = sys.argv[1]
  120. lemon_cfg = configparser.ConfigParser()
  121. lemon_cfg.read(f"./config/{config_name}")
  122. parameters = lemon_cfg['parameters']
  123. output_dir = parameters['output_dir']
  124. output_dir = output_dir[:-1] if output_dir.endswith("/") else output_dir
  125. threshold = parameters.getfloat('threshold')
  126. current_container = output_dir.rstrip("/").split("/")[-1]
  127. backend_choices = [1,2,3]
  128. # you can try different threshold
  129. unique_inconsistencies_dict = set()
  130. total_model_inputs = dict()
  131. all_relations = dict()
  132. for backend_choice in backend_choices:
  133. if backend_choice == 1:
  134. backends = ['tensorflow', 'theano', 'cntk']
  135. elif backend_choice == 2:
  136. backends = ['tensorflow', 'theano', 'mxnet']
  137. else:
  138. backends = ['tensorflow', 'cntk', 'mxnet']
  139. print(current_container,backends)
  140. backends_str = "-".join(backends)
  141. backend_pairs = [f"{pair[0]}_{pair[1]}" for pair in combinations(backends, 2)]
  142. """Get all exps"""
  143. exps = parameters['exps'].lstrip().rstrip().split(" ")
  144. exps.sort(key=lambda x: x)
  145. compare_columns = ['M-O', 'O-M', 'O&M']
  146. localize_model_inputs = {exp: set() for exp in exps}
  147. unique_incons = dict()
  148. exp_analysis = {exp: {bkpair: list() for bkpair in backend_pairs} for exp in exps}
  149. """Generate unique inconsistency"""
  150. exp_inputs_dict = get_all_inputs()
  151. for exp_id, backends_incons in exp_inputs_dict.items():
  152. print("######{}######".format(exp_id))
  153. exp_dict = dict()
  154. for bk_pair, incons in backends_incons.items():
  155. print("------{}------".format(bk_pair))
  156. # a list of tuples. like(incon_idntfr,incon_value)
  157. origin_incons, mutated_incons, _, _ = set_calculation(incons)
  158. origin_set,_ = close_incons_reduction(origin_incons)
  159. mutated_set,_ = close_incons_reduction(mutated_incons)
  160. for incon in origin_set:
  161. unique_inconsistencies_dict.add((bk_pair, 'O', incon))
  162. for incon in mutated_set:
  163. unique_inconsistencies_dict.add((bk_pair, 'M', incon))
  164. localize_model_inputs = update_localize_model_inputs(exp_id, [t[0] for t in origin_set],
  165. localize_model_inputs)
  166. localize_model_inputs = update_localize_model_inputs(exp_id, [t[0] for t in mutated_set],
  167. localize_model_inputs)
  168. """print model_inputs to localize"""
  169. print("\n########Localize model_inputs##########")
  170. # check how many model input has been localized
  171. non_localized_cnt = localized_cnt = 0
  172. with open(os.path.join(output_dir, f"localize_model_inputs-{backends_str}.pkl"), "wb") as fw:
  173. pickle.dump(localize_model_inputs, fw)
  174. for exp_id, model_set in localize_model_inputs.items():
  175. if exp_id not in total_model_inputs.keys():
  176. total_model_inputs[exp_id] = set()
  177. for mi in model_set:
  178. total_model_inputs[exp_id].add(mi)
  179. for exp_id,mis in total_model_inputs.items():
  180. O_num,M_num = 0,0
  181. for mi in mis:
  182. if mi.split("_")[1] == 'origin0':
  183. O_num += 1
  184. else:
  185. M_num +=1
  186. print(f"{exp_id}: {len(mis)} O:{O_num} M:{M_num}")
  187. with open(os.path.join(output_dir, f"unique_inconsistencies.pkl"), "wb") as fw:
  188. pickle.dump(unique_inconsistencies_dict, fw)
  189. end_time = datetime.now()
  190. print("Time cost:",end_time-start_time)