model2json.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. import keras
  2. import json
  3. import os
  4. from itertools import product
  5. import sys
  6. sys.path.append("../")
  7. # from scripts.coverage import custom_objects
  8. import tensorflow as tf
  9. import psutil
  10. import scripts.tools.utils as utils_tools
  11. # folder_path = 'model_json'
  12. # api_config_pool_path = 'api_config_pool.json'
  13. PARAMETER_SPACE = 5
  14. import configparser
  15. def custom_objects():
  16. def no_activation(x):
  17. return x
  18. def leakyrelu(x):
  19. import keras.backend as K
  20. return K.relu(x, alpha=0.01)
  21. objects = {}
  22. objects['no_activation'] = no_activation
  23. objects['leakyrelu'] = leakyrelu
  24. return objects
  25. def extract_edges(model):
  26. layer_list = model.layers
  27. existing_edges = []
  28. for layer in layer_list:
  29. start_layer_class = layer.__class__.__name__
  30. if "Input" in start_layer_class:
  31. continue
  32. for node in layer._outbound_nodes:
  33. end_layer_class = node.outbound_layer.__class__.__name__
  34. edge = (start_layer_class, end_layer_class) # edge should be direct
  35. if edge not in existing_edges:
  36. existing_edges.append(edge)
  37. return existing_edges
  38. def extract_nodes(model):
  39. """
  40. existing_nodes: {"layer_name1": [layer_config1, layer_config2], "layer_name2": [], ...}
  41. """
  42. layer_list = model.layers
  43. existing_nodes = {}
  44. for layer in layer_list:
  45. layer_config = layer.get_config()
  46. layer_config.pop("name")
  47. if "filters" in layer_config: layer_config.pop("filters")
  48. if "units" in layer_config: layer_config.pop("units")
  49. layer_class = layer.__class__.__name__
  50. if 'Input' in layer_class:
  51. continue
  52. if layer_class not in existing_nodes:
  53. existing_nodes[layer_class] = []
  54. if layer_config not in existing_nodes[layer_class]:
  55. existing_nodes[layer_class].append(layer_config)
  56. return existing_nodes
  57. def extract_inputs(model):
  58. """
  59. existing_inputs: {"layer_class": {"input_dims": [], "dtype": [], "shape": []}}
  60. layer_dims: {"layer_class": {"input_dims": [], "output_dims": []}}
  61. """
  62. # if model.__class__.__name__ == 'Sequential':
  63. # layer_list = model.layers
  64. # else:
  65. # layer_list = model.layers[1:] # ignore the first input layer
  66. layer_list = model.layers
  67. existing_inputs = {}
  68. layer_dims = {}
  69. for layer in layer_list:
  70. layer_class = layer.__class__.__name__
  71. if 'Input' in layer_class:
  72. continue
  73. if layer_class not in existing_inputs:
  74. existing_inputs[layer_class] = {"input_dims": [], "dtype": [], "shape": []}
  75. layer_dims[layer_class] = {"input_dims": [], "output_dims": []}
  76. input_dims = len(layer.input.shape)
  77. output_dims = len(layer.output.shape)
  78. dtype = str(layer.input.dtype.name)
  79. shape = str(list(layer.input.shape))
  80. if input_dims not in existing_inputs[layer_class]['input_dims']:
  81. existing_inputs[layer_class]['input_dims'].append(input_dims)
  82. if input_dims not in layer_dims[layer_class]['input_dims']:
  83. layer_dims[layer_class]['input_dims'].append(input_dims)
  84. if output_dims not in layer_dims[layer_class]['output_dims']:
  85. layer_dims[layer_class]['output_dims'].append(output_dims)
  86. if dtype not in existing_inputs[layer_class]['dtype']:
  87. existing_inputs[layer_class]['dtype'].append(dtype)
  88. if shape not in existing_inputs[layer_class]['shape']:
  89. existing_inputs[layer_class]['shape'].append(shape)
  90. return existing_inputs, layer_dims
  91. def model_to_json(model_path, folder_path):
  92. cur_model = keras.models.load_model(model_path, custom_objects=custom_objects())
  93. cur_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  94. existing_edges = extract_edges(cur_model)
  95. existing_node = extract_nodes(cur_model)
  96. existing_inputs, layer_dims = extract_inputs(cur_model)
  97. layer_dict = {}
  98. cur_edge_num = 0
  99. for layer in cur_model.layers:
  100. layer_name = layer.__class__.__name__
  101. if 'Input' in layer_name:
  102. continue
  103. layer_dict[layer_name] = layer_dict[layer_name] + 1 if layer_name in layer_dict else 1
  104. inbound_nodes = layer._inbound_nodes
  105. if inbound_nodes:
  106. if isinstance(inbound_nodes[0].inbound_layers, list):
  107. cur_edge_num += len(inbound_nodes[0].inbound_layers)
  108. else:
  109. if inbound_nodes:
  110. cur_edge_num += 1
  111. layer_num = sum(layer_dict.values())
  112. layer_type = list(layer_dict.keys())
  113. print(f'layer_num:{layer_num}')
  114. print(f'layer_type_num:{layer_type}')
  115. print(f'cur_edge_num:{cur_edge_num}')
  116. print(existing_edges)
  117. print(existing_node)
  118. print(existing_inputs)
  119. print(layer_dims)
  120. cur_model_info = {}
  121. cur_model_info['edges'] = existing_edges
  122. cur_model_info['layer_config'] = existing_node
  123. cur_model_info['layer_input_info'] = existing_inputs
  124. cur_model_info['layer_num'] = layer_num
  125. cur_model_info['layer_type'] = layer_type
  126. cur_model_info['cur_edge_num'] = cur_edge_num
  127. cur_model_info['layer_dims'] = layer_dims
  128. if 'svhn' in model_path or 'fashion2' in model_path:
  129. json_path = model_path.replace("hdf5","json")+".json"
  130. else:
  131. json_path = os.path.join(folder_path, model_path.split("\\")[-1][:-3] + '.json')
  132. # if not os.path.exists(json_path):
  133. for op in cur_model_info['layer_config']:
  134. for config in cur_model_info['layer_config'][op]:
  135. for config_key, config_value in config.items():
  136. if not (isinstance(config_value, int) or isinstance(config_value, str) or isinstance(config_value,
  137. list) or isinstance(
  138. config_value, float) or isinstance(config_value, dict) or isinstance(config_value,
  139. set) or isinstance(
  140. config_value, tuple)):
  141. config[config_key] = str(config_value)
  142. try:
  143. with open(json_path, 'w') as json_file:
  144. json.dump(cur_model_info, json_file, indent=4)
  145. except Exception as e:
  146. print("!!!!!!!")
  147. print(cur_model_info)
  148. raise e
  149. def union_json(single_json_path, all_json_path):
  150. """
  151. single_json_path:单个json文件的路径
  152. all_json_path:分母json文件的路径
  153. """
  154. with open(single_json_path, 'r') as json_file:
  155. model_info = json.load(json_file)
  156. if not os.path.exists(all_json_path):
  157. all_layer_info = {}
  158. else:
  159. with open(all_json_path, 'r') as all_json_file:
  160. all_layer_info = json.load(all_json_file)
  161. if 'layer_config' not in all_layer_info.keys():
  162. all_layer_info['layer_config'] = {}
  163. for layer_class, layer_configs in model_info['layer_config'].items():
  164. if layer_class not in all_layer_info['layer_config'].keys():
  165. all_layer_info['layer_config'][layer_class] = layer_configs
  166. else:
  167. for layer_config in layer_configs:
  168. if layer_config not in all_layer_info['layer_config'][layer_class]:
  169. all_layer_info['layer_config'][layer_class].append(layer_config)
  170. if 'layer_input_info' not in all_layer_info.keys():
  171. all_layer_info['layer_input_info'] = {}
  172. for layer_class, layer_input_info in model_info['layer_input_info'].items():
  173. if layer_class not in all_layer_info['layer_input_info'].keys():
  174. all_layer_info['layer_input_info'][layer_class] = layer_input_info
  175. else:
  176. for attr in ["input_dims", "dtype", "shape"]:
  177. if attr not in all_layer_info['layer_input_info'][layer_class].keys():
  178. all_layer_info['layer_input_info'][layer_class][attr] = layer_input_info[attr]
  179. else:
  180. all_layer_info['layer_input_info'][layer_class][attr] = list(
  181. set(layer_input_info[attr]).union(set(all_layer_info['layer_input_info'][layer_class][attr])))
  182. if 'layer_dims' not in all_layer_info.keys():
  183. all_layer_info['layer_dims'] = {}
  184. for layer_class, layer_dims in model_info['layer_dims'].items():
  185. if layer_class not in all_layer_info['layer_dims'].keys():
  186. all_layer_info['layer_dims'][layer_class] = layer_dims
  187. else:
  188. for attr in ["input_dims", "output_dims"]:
  189. if attr not in all_layer_info['layer_dims'][layer_class].keys():
  190. all_layer_info['layer_dims'][layer_class][attr] = layer_dims[attr]
  191. else:
  192. all_layer_info['layer_dims'][layer_class][attr] = list(
  193. set(layer_dims[attr]).union(set(all_layer_info['layer_dims'][layer_class][attr])))
  194. if 'layer_type' not in all_layer_info.keys():
  195. all_layer_info['layer_type'] = model_info['layer_type']
  196. else:
  197. all_layer_info['layer_type'] = list(set(model_info['layer_type']).union(set(all_layer_info['layer_type'])))
  198. if 'max_edge_num' not in all_layer_info.keys():
  199. all_layer_info['max_edge_num'] = model_info['cur_edge_num']
  200. else:
  201. all_layer_info['max_edge_num'] = max(all_layer_info['max_edge_num'], model_info['cur_edge_num'])
  202. if 'max_layer_num' not in all_layer_info.keys():
  203. all_layer_info['max_layer_num'] = model_info['layer_num']
  204. else:
  205. all_layer_info['max_layer_num'] = max(all_layer_info['max_layer_num'], model_info['layer_num'])
  206. with open(all_json_path, 'w') as json_file:
  207. json.dump(all_layer_info, json_file, indent=4)
  208. class CoverageCalculatornew:
  209. # init里只能是和具体模型无关的数值的初始化
  210. def __init__(self, all_json_path, api_config_pool_path):
  211. self.all_layer_info = {}
  212. self.edges = []
  213. self.all_edges = []
  214. self.layer_config = {}
  215. self.layer_input_info = {}
  216. self.POSSIBLE_DTYPE = {'bfloat16', 'double', 'float16', 'float32', 'float64', 'half'}
  217. with open(api_config_pool_path, "r") as pool_file:
  218. self.api_config_pool = json.load(pool_file)
  219. with open(all_json_path, 'r') as json_file:
  220. self.all_layer_info = json.load(json_file)
  221. self.total_dtype_num = len(self.all_layer_info["layer_input_info"]) * len(self.POSSIBLE_DTYPE)
  222. self.total_shape_num = len(self.all_layer_info["layer_input_info"]) * PARAMETER_SPACE
  223. self.total_ndims_num = 0
  224. for layer_class in self.all_layer_info["layer_input_info"]:
  225. ndims_list = self.all_layer_info["layer_input_info"][layer_class]["input_dims"]
  226. self.total_ndims_num += len(ndims_list)
  227. self.total_input_num = self.total_ndims_num + self.total_dtype_num + self.total_shape_num
  228. self.total_param = {}
  229. # self.total_param_list = {}
  230. self.total_param_num = 0
  231. for layer_class in self.api_config_pool:
  232. self.total_param[layer_class] = 0
  233. # self.total_param_list[layer_class] = {}
  234. for config in self.api_config_pool[layer_class]:
  235. # self.total_param_list[layer_class][config] = []
  236. if self.api_config_pool[layer_class][config] == [0]:
  237. self.total_param[layer_class] += PARAMETER_SPACE
  238. else:
  239. self.total_param[layer_class] += len(self.api_config_pool[layer_class][config])
  240. self.total_param_num += self.total_param[layer_class]
  241. for pre_layer, next_layer in product(self.all_layer_info["layer_dims"].keys(), repeat=2):
  242. if set(self.all_layer_info["layer_dims"][pre_layer]["output_dims"]).intersection(
  243. set(self.all_layer_info["layer_dims"][next_layer]["input_dims"])) != 0:
  244. self.all_edges.append([pre_layer, next_layer])
  245. self.max_edge_num = self.all_layer_info['max_edge_num']
  246. self.max_layer_num = self.all_layer_info['max_layer_num']
  247. self.layer_type = len(self.all_layer_info["layer_type"])
  248. self.cur_edge_num = 0
  249. self.cur_layer_num = 0
  250. self.cur_layer_type = 0
  251. def load_json(self, json_path):
  252. with open(json_path, 'r') as json_file:
  253. model_info = json.load(json_file)
  254. self.cur_edge_num = model_info["cur_edge_num"]
  255. self.cur_layer_num = model_info['layer_num']
  256. self.cur_layer_type = len(model_info['layer_type'])
  257. self.edges = []
  258. self.layer_config = {}
  259. self.layer_input_info = {}
  260. for edge in model_info['edges']:
  261. if edge not in self.edges:
  262. self.edges.append(edge)
  263. for class_type, configs in model_info["layer_config"].items():
  264. if class_type not in self.layer_config:
  265. self.layer_config[class_type] = configs
  266. else:
  267. for config in configs:
  268. if config not in self.layer_config[class_type]:
  269. self.layer_config[class_type].append(config)
  270. print(self.layer_config)
  271. for layer_class, layer_input_info in model_info['layer_input_info'].items():
  272. if layer_class not in self.layer_input_info:
  273. self.layer_input_info[layer_class] = layer_input_info
  274. else:
  275. for attr in ["input_dims", "dtype", "shape"]:
  276. if attr not in self.layer_input_info[layer_class].keys():
  277. self.layer_input_info[layer_class][attr] = layer_input_info[attr]
  278. else:
  279. self.layer_input_info[layer_class][attr] = list(
  280. set(layer_input_info[attr]).union(
  281. set(self.layer_input_info[layer_class][attr])))
  282. def api_pair_coverage(self):
  283. # print(f"The API Pair Coverage Is: {len(self.edges)}/{len(self.all_edges)}")
  284. return len(self.edges) / len(self.all_edges)
  285. def _layer_config_coverage(self, layer_config_list, layer_class):
  286. """
  287. hp: count of param_value.
  288. param_list: {param1: [value1, value2], ...}
  289. """
  290. config_pool = self.api_config_pool[layer_class]
  291. param_list = {}
  292. for param in config_pool:
  293. param_list[param] = []
  294. hp = 0
  295. # Journal Submitted Version is Below.
  296. for layer_config in layer_config_list:
  297. for param in layer_config:
  298. if param not in param_list:
  299. continue
  300. if config_pool[param] == [0]:
  301. if layer_config[param] not in param_list[param] and len(param_list[param]) <= PARAMETER_SPACE:
  302. param_list[param].append(layer_config[param])
  303. hp += 1
  304. else:
  305. if layer_config[param] not in param_list[param]:
  306. param_list[param].append(layer_config[param])
  307. hp += 1
  308. return hp, param_list
  309. def config_coverage(self):
  310. total_hp = 0
  311. for layer_class in self.layer_config:
  312. if layer_class in self.api_config_pool:
  313. layer_config_list = self.layer_config[layer_class]
  314. hp, param_list = self._layer_config_coverage(layer_config_list, layer_class)
  315. total_hp += hp
  316. # print(f"The Configuration Coverage is: {total_hp}/{self.total_param_num}")
  317. return total_hp / self.total_param_num
  318. def ndims_coverage(self):
  319. """
  320. ndims_cov
  321. """
  322. covered_ndims_num = 0
  323. for layer_class in self.layer_input_info:
  324. ndims_list = self.layer_input_info[layer_class]["input_dims"]
  325. covered_ndims_num += len(ndims_list)
  326. return covered_ndims_num
  327. def dtype_coverage(self):
  328. covered_dtype_num = 0
  329. for layer_class in self.layer_input_info:
  330. dtype_list = self.layer_input_info[layer_class]["dtype"]
  331. covered_dtype_num += len(dtype_list)
  332. return covered_dtype_num
  333. def shape_coverage(self):
  334. covered_shape_num = 0
  335. for layer_class in self.layer_input_info:
  336. shape_list = self.layer_input_info[layer_class]["shape"]
  337. covered_shape_num += min(len(shape_list),
  338. PARAMETER_SPACE) # if the total number of shape is larger that SHAPE_SPACE, we set it as 100%
  339. return covered_shape_num
  340. def input_coverage(self):
  341. """
  342. input_cov = ndim_cov + dtype_cov + shape_cov
  343. """
  344. covered_ndims = self.ndims_coverage()
  345. covered_dtype = self.dtype_coverage()
  346. covered_shape = self.shape_coverage()
  347. print(f"The NDims Coverage Is: {covered_ndims}/{self.total_ndims_num}")
  348. print(f"The DType Coverage Is: {covered_dtype}/{self.total_dtype_num}")
  349. print(f"The Shape Coverage Is: {covered_shape}/{self.total_shape_num}")
  350. print(f"The Input Coverage Is: {covered_ndims + covered_dtype + covered_shape}/{self.total_input_num}")
  351. input_cov = (covered_ndims + covered_dtype + covered_shape) / self.total_input_num
  352. ndims_cov = covered_ndims / self.total_ndims_num
  353. dtype_cov = covered_dtype / self.total_dtype_num
  354. shape_cov = covered_shape / self.total_shape_num
  355. return input_cov, ndims_cov, dtype_cov, shape_cov
  356. def op_type_cover(self):
  357. print(f'op_type_cover is: {self.cur_layer_type}/{self.layer_type}')
  358. return self.cur_layer_type / self.layer_type
  359. def op_num_cover(self):
  360. print(f'op_num_cover is: {self.cur_layer_num}/{self.max_layer_num}')
  361. return self.cur_layer_num / self.max_layer_num
  362. def edge_cover(self):
  363. print(f'edge_cover is: {self.cur_edge_num}/{self.max_edge_num}')
  364. return self.cur_edge_num / self.max_edge_num
  365. def cal_coverage(self):
  366. input_cov, ndims_cov, dtype_cov, shape_cov = self.input_coverage()
  367. config_cov = self.config_coverage()
  368. api_cov = self.api_pair_coverage()
  369. op_type_cov = self.op_type_cover()
  370. op_num_cov = self.op_num_cover()
  371. edge_cov = self.edge_cover()
  372. return input_cov, config_cov, api_cov, op_type_cov, op_num_cov, edge_cov
  373. # if __name__ == '__main__':
  374. # model_path1 = 'data/mnist_output/000005/models/tensorflow.h5'
  375. # model_path2 = 'data/mnist_output/000004/models/tensorflow.h5'
  376. # all_json_path = os.path.join(folder_path, "all_layer_info.json")
  377. # model_to_json(model_path1)
  378. # model_to_json(model_path2)
  379. # for file in os.listdir(folder_path):
  380. # if file != 'all_layer_info.json':
  381. # file_path = os.path.join(folder_path, file)
  382. # union_json(file_path, all_json_path)
  383. #
  384. # cal_cov = CoverageCalculator(all_json_path)
  385. # cal_cov.load_json('model_json/000004.json')
  386. # cal_cov.cal_coverage()