DFG.py 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179
  1. # Copyright (c) Microsoft Corporation.
  2. # Licensed under the MIT license.
  3. from tree_sitter import Language, Parser
  4. from .utils import (remove_comments_and_docstrings,
  5. tree_to_token_index,
  6. index_to_code_token,
  7. tree_to_variable_index)
  8. def DFG_python(root_node, index_to_code, states):
  9. assignment = ['assignment', 'augmented_assignment', 'for_in_clause']
  10. if_statement = ['if_statement']
  11. for_statement = ['for_statement']
  12. while_statement = ['while_statement']
  13. do_first_statement = ['for_in_clause']
  14. def_statement = ['default_parameter']
  15. states = states.copy()
  16. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  17. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  18. if root_node.type == code:
  19. return [], states
  20. elif code in states:
  21. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  22. else:
  23. if root_node.type == 'identifier':
  24. states[code] = [idx]
  25. return [(code, idx, 'comesFrom', [], [])], states
  26. elif root_node.type in def_statement:
  27. name = root_node.child_by_field_name('name')
  28. value = root_node.child_by_field_name('value')
  29. DFG = []
  30. if value is None:
  31. indexs = tree_to_variable_index(name, index_to_code)
  32. for index in indexs:
  33. idx, code = index_to_code[index]
  34. DFG.append((code, idx, 'comesFrom', [], []))
  35. states[code] = [idx]
  36. return sorted(DFG, key=lambda x: x[1]), states
  37. else:
  38. name_indexs = tree_to_variable_index(name, index_to_code)
  39. value_indexs = tree_to_variable_index(value, index_to_code)
  40. temp, states = DFG_python(value, index_to_code, states)
  41. DFG += temp
  42. for index1 in name_indexs:
  43. idx1, code1 = index_to_code[index1]
  44. for index2 in value_indexs:
  45. idx2, code2 = index_to_code[index2]
  46. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  47. states[code1] = [idx1]
  48. return sorted(DFG, key=lambda x: x[1]), states
  49. elif root_node.type in assignment:
  50. if root_node.type == 'for_in_clause':
  51. right_nodes = [root_node.children[-1]]
  52. left_nodes = [root_node.child_by_field_name('left')]
  53. else:
  54. if root_node.child_by_field_name('right') is None:
  55. return [], states
  56. left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
  57. right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
  58. if len(right_nodes) != len(left_nodes):
  59. left_nodes = [root_node.child_by_field_name('left')]
  60. right_nodes = [root_node.child_by_field_name('right')]
  61. if len(left_nodes) == 0:
  62. left_nodes = [root_node.child_by_field_name('left')]
  63. if len(right_nodes) == 0:
  64. right_nodes = [root_node.child_by_field_name('right')]
  65. DFG = []
  66. for node in right_nodes:
  67. temp, states = DFG_python(node, index_to_code, states)
  68. DFG += temp
  69. for left_node, right_node in zip(left_nodes, right_nodes):
  70. left_tokens_index = tree_to_variable_index(left_node, index_to_code)
  71. right_tokens_index = tree_to_variable_index(right_node, index_to_code)
  72. temp = []
  73. for token1_index in left_tokens_index:
  74. idx1, code1 = index_to_code[token1_index]
  75. temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
  76. [index_to_code[x][0] for x in right_tokens_index]))
  77. states[code1] = [idx1]
  78. DFG += temp
  79. return sorted(DFG, key=lambda x: x[1]), states
  80. elif root_node.type in if_statement:
  81. DFG = []
  82. current_states = states.copy()
  83. others_states = []
  84. tag = False
  85. if 'else' in root_node.type:
  86. tag = True
  87. for child in root_node.children:
  88. if 'else' in child.type:
  89. tag = True
  90. if child.type not in ['elif_clause', 'else_clause']:
  91. temp, current_states = DFG_python(child, index_to_code, current_states)
  92. DFG += temp
  93. else:
  94. temp, new_states = DFG_python(child, index_to_code, states)
  95. DFG += temp
  96. others_states.append(new_states)
  97. others_states.append(current_states)
  98. if tag is False:
  99. others_states.append(states)
  100. new_states = {}
  101. for dic in others_states:
  102. for key in dic:
  103. if key not in new_states:
  104. new_states[key] = dic[key].copy()
  105. else:
  106. new_states[key] += dic[key]
  107. for key in new_states:
  108. new_states[key] = sorted(list(set(new_states[key])))
  109. return sorted(DFG, key=lambda x: x[1]), new_states
  110. elif root_node.type in for_statement:
  111. DFG = []
  112. for i in range(2):
  113. right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
  114. left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
  115. if len(right_nodes) != len(left_nodes):
  116. left_nodes = [root_node.child_by_field_name('left')]
  117. right_nodes = [root_node.child_by_field_name('right')]
  118. if len(left_nodes) == 0:
  119. left_nodes = [root_node.child_by_field_name('left')]
  120. if len(right_nodes) == 0:
  121. right_nodes = [root_node.child_by_field_name('right')]
  122. for node in right_nodes:
  123. temp, states = DFG_python(node, index_to_code, states)
  124. DFG += temp
  125. for left_node, right_node in zip(left_nodes, right_nodes):
  126. left_tokens_index = tree_to_variable_index(left_node, index_to_code)
  127. right_tokens_index = tree_to_variable_index(right_node, index_to_code)
  128. temp = []
  129. for token1_index in left_tokens_index:
  130. idx1, code1 = index_to_code[token1_index]
  131. temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
  132. [index_to_code[x][0] for x in right_tokens_index]))
  133. states[code1] = [idx1]
  134. DFG += temp
  135. if root_node.children[-1].type == "block":
  136. temp, states = DFG_python(root_node.children[-1], index_to_code, states)
  137. DFG += temp
  138. dic = {}
  139. for x in DFG:
  140. if (x[0], x[1], x[2]) not in dic:
  141. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  142. else:
  143. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  144. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  145. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  146. return sorted(DFG, key=lambda x: x[1]), states
  147. elif root_node.type in while_statement:
  148. DFG = []
  149. for i in range(2):
  150. for child in root_node.children:
  151. temp, states = DFG_python(child, index_to_code, states)
  152. DFG += temp
  153. dic = {}
  154. for x in DFG:
  155. if (x[0], x[1], x[2]) not in dic:
  156. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  157. else:
  158. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  159. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  160. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  161. return sorted(DFG, key=lambda x: x[1]), states
  162. else:
  163. DFG = []
  164. for child in root_node.children:
  165. if child.type in do_first_statement:
  166. temp, states = DFG_python(child, index_to_code, states)
  167. DFG += temp
  168. for child in root_node.children:
  169. if child.type not in do_first_statement:
  170. temp, states = DFG_python(child, index_to_code, states)
  171. DFG += temp
  172. return sorted(DFG, key=lambda x: x[1]), states
  173. def DFG_java(root_node, index_to_code, states):
  174. assignment = ['assignment_expression']
  175. def_statement = ['variable_declarator']
  176. increment_statement = ['update_expression']
  177. if_statement = ['if_statement', 'else']
  178. for_statement = ['for_statement']
  179. enhanced_for_statement = ['enhanced_for_statement']
  180. while_statement = ['while_statement']
  181. do_first_statement = []
  182. states = states.copy()
  183. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  184. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  185. if root_node.type == code:
  186. return [], states
  187. elif code in states:
  188. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  189. else:
  190. if root_node.type == 'identifier':
  191. states[code] = [idx]
  192. return [(code, idx, 'comesFrom', [], [])], states
  193. elif root_node.type in def_statement:
  194. name = root_node.child_by_field_name('name')
  195. value = root_node.child_by_field_name('value')
  196. DFG = []
  197. if value is None:
  198. indexs = tree_to_variable_index(name, index_to_code)
  199. for index in indexs:
  200. idx, code = index_to_code[index]
  201. DFG.append((code, idx, 'comesFrom', [], []))
  202. states[code] = [idx]
  203. return sorted(DFG, key=lambda x: x[1]), states
  204. else:
  205. name_indexs = tree_to_variable_index(name, index_to_code)
  206. value_indexs = tree_to_variable_index(value, index_to_code)
  207. temp, states = DFG_java(value, index_to_code, states)
  208. DFG += temp
  209. for index1 in name_indexs:
  210. idx1, code1 = index_to_code[index1]
  211. for index2 in value_indexs:
  212. idx2, code2 = index_to_code[index2]
  213. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  214. states[code1] = [idx1]
  215. return sorted(DFG, key=lambda x: x[1]), states
  216. elif root_node.type in assignment:
  217. left_nodes = root_node.child_by_field_name('left')
  218. right_nodes = root_node.child_by_field_name('right')
  219. DFG = []
  220. temp, states = DFG_java(right_nodes, index_to_code, states)
  221. DFG += temp
  222. name_indexs = tree_to_variable_index(left_nodes, index_to_code)
  223. value_indexs = tree_to_variable_index(right_nodes, index_to_code)
  224. for index1 in name_indexs:
  225. idx1, code1 = index_to_code[index1]
  226. for index2 in value_indexs:
  227. idx2, code2 = index_to_code[index2]
  228. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  229. states[code1] = [idx1]
  230. return sorted(DFG, key=lambda x: x[1]), states
  231. elif root_node.type in increment_statement:
  232. DFG = []
  233. indexs = tree_to_variable_index(root_node, index_to_code)
  234. for index1 in indexs:
  235. idx1, code1 = index_to_code[index1]
  236. for index2 in indexs:
  237. idx2, code2 = index_to_code[index2]
  238. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  239. states[code1] = [idx1]
  240. return sorted(DFG, key=lambda x: x[1]), states
  241. elif root_node.type in if_statement:
  242. DFG = []
  243. current_states = states.copy()
  244. others_states = []
  245. flag = False
  246. tag = False
  247. if 'else' in root_node.type:
  248. tag = True
  249. for child in root_node.children:
  250. if 'else' in child.type:
  251. tag = True
  252. if child.type not in if_statement and flag is False:
  253. temp, current_states = DFG_java(child, index_to_code, current_states)
  254. DFG += temp
  255. else:
  256. flag = True
  257. temp, new_states = DFG_java(child, index_to_code, states)
  258. DFG += temp
  259. others_states.append(new_states)
  260. others_states.append(current_states)
  261. if tag is False:
  262. others_states.append(states)
  263. new_states = {}
  264. for dic in others_states:
  265. for key in dic:
  266. if key not in new_states:
  267. new_states[key] = dic[key].copy()
  268. else:
  269. new_states[key] += dic[key]
  270. for key in new_states:
  271. new_states[key] = sorted(list(set(new_states[key])))
  272. return sorted(DFG, key=lambda x: x[1]), new_states
  273. elif root_node.type in for_statement:
  274. DFG = []
  275. for child in root_node.children:
  276. temp, states = DFG_java(child, index_to_code, states)
  277. DFG += temp
  278. flag = False
  279. for child in root_node.children:
  280. if flag:
  281. temp, states = DFG_java(child, index_to_code, states)
  282. DFG += temp
  283. elif child.type == "local_variable_declaration":
  284. flag = True
  285. dic = {}
  286. for x in DFG:
  287. if (x[0], x[1], x[2]) not in dic:
  288. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  289. else:
  290. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  291. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  292. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  293. return sorted(DFG, key=lambda x: x[1]), states
  294. elif root_node.type in enhanced_for_statement:
  295. name = root_node.child_by_field_name('name')
  296. value = root_node.child_by_field_name('value')
  297. body = root_node.child_by_field_name('body')
  298. DFG = []
  299. for i in range(2):
  300. temp, states = DFG_java(value, index_to_code, states)
  301. DFG += temp
  302. name_indexs = tree_to_variable_index(name, index_to_code)
  303. value_indexs = tree_to_variable_index(value, index_to_code)
  304. for index1 in name_indexs:
  305. idx1, code1 = index_to_code[index1]
  306. for index2 in value_indexs:
  307. idx2, code2 = index_to_code[index2]
  308. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  309. states[code1] = [idx1]
  310. temp, states = DFG_java(body, index_to_code, states)
  311. DFG += temp
  312. dic = {}
  313. for x in DFG:
  314. if (x[0], x[1], x[2]) not in dic:
  315. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  316. else:
  317. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  318. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  319. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  320. return sorted(DFG, key=lambda x: x[1]), states
  321. elif root_node.type in while_statement:
  322. DFG = []
  323. for i in range(2):
  324. for child in root_node.children:
  325. temp, states = DFG_java(child, index_to_code, states)
  326. DFG += temp
  327. dic = {}
  328. for x in DFG:
  329. if (x[0], x[1], x[2]) not in dic:
  330. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  331. else:
  332. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  333. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  334. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  335. return sorted(DFG, key=lambda x: x[1]), states
  336. else:
  337. DFG = []
  338. for child in root_node.children:
  339. if child.type in do_first_statement:
  340. temp, states = DFG_java(child, index_to_code, states)
  341. DFG += temp
  342. for child in root_node.children:
  343. if child.type not in do_first_statement:
  344. temp, states = DFG_java(child, index_to_code, states)
  345. DFG += temp
  346. return sorted(DFG, key=lambda x: x[1]), states
  347. def DFG_csharp(root_node, index_to_code, states):
  348. assignment = ['assignment_expression']
  349. def_statement = ['variable_declarator']
  350. increment_statement = ['postfix_unary_expression']
  351. if_statement = ['if_statement', 'else']
  352. for_statement = ['for_statement']
  353. enhanced_for_statement = ['for_each_statement']
  354. while_statement = ['while_statement']
  355. do_first_statement = []
  356. states = states.copy()
  357. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  358. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  359. if root_node.type == code:
  360. return [], states
  361. elif code in states:
  362. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  363. else:
  364. if root_node.type == 'identifier':
  365. states[code] = [idx]
  366. return [(code, idx, 'comesFrom', [], [])], states
  367. elif root_node.type in def_statement:
  368. if len(root_node.children) == 2:
  369. name = root_node.children[0]
  370. value = root_node.children[1]
  371. else:
  372. name = root_node.children[0]
  373. value = None
  374. DFG = []
  375. if value is None:
  376. indexs = tree_to_variable_index(name, index_to_code)
  377. for index in indexs:
  378. idx, code = index_to_code[index]
  379. DFG.append((code, idx, 'comesFrom', [], []))
  380. states[code] = [idx]
  381. return sorted(DFG, key=lambda x: x[1]), states
  382. else:
  383. name_indexs = tree_to_variable_index(name, index_to_code)
  384. value_indexs = tree_to_variable_index(value, index_to_code)
  385. temp, states = DFG_csharp(value, index_to_code, states)
  386. DFG += temp
  387. for index1 in name_indexs:
  388. idx1, code1 = index_to_code[index1]
  389. for index2 in value_indexs:
  390. idx2, code2 = index_to_code[index2]
  391. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  392. states[code1] = [idx1]
  393. return sorted(DFG, key=lambda x: x[1]), states
  394. elif root_node.type in assignment:
  395. left_nodes = root_node.child_by_field_name('left')
  396. right_nodes = root_node.child_by_field_name('right')
  397. DFG = []
  398. temp, states = DFG_csharp(right_nodes, index_to_code, states)
  399. DFG += temp
  400. name_indexs = tree_to_variable_index(left_nodes, index_to_code)
  401. value_indexs = tree_to_variable_index(right_nodes, index_to_code)
  402. for index1 in name_indexs:
  403. idx1, code1 = index_to_code[index1]
  404. for index2 in value_indexs:
  405. idx2, code2 = index_to_code[index2]
  406. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  407. states[code1] = [idx1]
  408. return sorted(DFG, key=lambda x: x[1]), states
  409. elif root_node.type in increment_statement:
  410. DFG = []
  411. indexs = tree_to_variable_index(root_node, index_to_code)
  412. for index1 in indexs:
  413. idx1, code1 = index_to_code[index1]
  414. for index2 in indexs:
  415. idx2, code2 = index_to_code[index2]
  416. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  417. states[code1] = [idx1]
  418. return sorted(DFG, key=lambda x: x[1]), states
  419. elif root_node.type in if_statement:
  420. DFG = []
  421. current_states = states.copy()
  422. others_states = []
  423. flag = False
  424. tag = False
  425. if 'else' in root_node.type:
  426. tag = True
  427. for child in root_node.children:
  428. if 'else' in child.type:
  429. tag = True
  430. if child.type not in if_statement and flag is False:
  431. temp, current_states = DFG_csharp(child, index_to_code, current_states)
  432. DFG += temp
  433. else:
  434. flag = True
  435. temp, new_states = DFG_csharp(child, index_to_code, states)
  436. DFG += temp
  437. others_states.append(new_states)
  438. others_states.append(current_states)
  439. if tag is False:
  440. others_states.append(states)
  441. new_states = {}
  442. for dic in others_states:
  443. for key in dic:
  444. if key not in new_states:
  445. new_states[key] = dic[key].copy()
  446. else:
  447. new_states[key] += dic[key]
  448. for key in new_states:
  449. new_states[key] = sorted(list(set(new_states[key])))
  450. return sorted(DFG, key=lambda x: x[1]), new_states
  451. elif root_node.type in for_statement:
  452. DFG = []
  453. for child in root_node.children:
  454. temp, states = DFG_csharp(child, index_to_code, states)
  455. DFG += temp
  456. flag = False
  457. for child in root_node.children:
  458. if flag:
  459. temp, states = DFG_csharp(child, index_to_code, states)
  460. DFG += temp
  461. elif child.type == "local_variable_declaration":
  462. flag = True
  463. dic = {}
  464. for x in DFG:
  465. if (x[0], x[1], x[2]) not in dic:
  466. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  467. else:
  468. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  469. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  470. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  471. return sorted(DFG, key=lambda x: x[1]), states
  472. elif root_node.type in enhanced_for_statement:
  473. name = root_node.child_by_field_name('left')
  474. value = root_node.child_by_field_name('right')
  475. body = root_node.child_by_field_name('body')
  476. DFG = []
  477. for i in range(2):
  478. temp, states = DFG_csharp(value, index_to_code, states)
  479. DFG += temp
  480. name_indexs = tree_to_variable_index(name, index_to_code)
  481. value_indexs = tree_to_variable_index(value, index_to_code)
  482. for index1 in name_indexs:
  483. idx1, code1 = index_to_code[index1]
  484. for index2 in value_indexs:
  485. idx2, code2 = index_to_code[index2]
  486. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  487. states[code1] = [idx1]
  488. temp, states = DFG_csharp(body, index_to_code, states)
  489. DFG += temp
  490. dic = {}
  491. for x in DFG:
  492. if (x[0], x[1], x[2]) not in dic:
  493. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  494. else:
  495. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  496. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  497. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  498. return sorted(DFG, key=lambda x: x[1]), states
  499. elif root_node.type in while_statement:
  500. DFG = []
  501. for i in range(2):
  502. for child in root_node.children:
  503. temp, states = DFG_csharp(child, index_to_code, states)
  504. DFG += temp
  505. dic = {}
  506. for x in DFG:
  507. if (x[0], x[1], x[2]) not in dic:
  508. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  509. else:
  510. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  511. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  512. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  513. return sorted(DFG, key=lambda x: x[1]), states
  514. else:
  515. DFG = []
  516. for child in root_node.children:
  517. if child.type in do_first_statement:
  518. temp, states = DFG_csharp(child, index_to_code, states)
  519. DFG += temp
  520. for child in root_node.children:
  521. if child.type not in do_first_statement:
  522. temp, states = DFG_csharp(child, index_to_code, states)
  523. DFG += temp
  524. return sorted(DFG, key=lambda x: x[1]), states
  525. def DFG_ruby(root_node, index_to_code, states):
  526. assignment = ['assignment', 'operator_assignment']
  527. if_statement = ['if', 'elsif', 'else', 'unless', 'when']
  528. for_statement = ['for']
  529. while_statement = ['while_modifier', 'until']
  530. do_first_statement = []
  531. def_statement = ['keyword_parameter']
  532. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  533. states = states.copy()
  534. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  535. if root_node.type == code:
  536. return [], states
  537. elif code in states:
  538. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  539. else:
  540. if root_node.type == 'identifier':
  541. states[code] = [idx]
  542. return [(code, idx, 'comesFrom', [], [])], states
  543. elif root_node.type in def_statement:
  544. name = root_node.child_by_field_name('name')
  545. value = root_node.child_by_field_name('value')
  546. DFG = []
  547. if value is None:
  548. indexs = tree_to_variable_index(name, index_to_code)
  549. for index in indexs:
  550. idx, code = index_to_code[index]
  551. DFG.append((code, idx, 'comesFrom', [], []))
  552. states[code] = [idx]
  553. return sorted(DFG, key=lambda x: x[1]), states
  554. else:
  555. name_indexs = tree_to_variable_index(name, index_to_code)
  556. value_indexs = tree_to_variable_index(value, index_to_code)
  557. temp, states = DFG_ruby(value, index_to_code, states)
  558. DFG += temp
  559. for index1 in name_indexs:
  560. idx1, code1 = index_to_code[index1]
  561. for index2 in value_indexs:
  562. idx2, code2 = index_to_code[index2]
  563. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  564. states[code1] = [idx1]
  565. return sorted(DFG, key=lambda x: x[1]), states
  566. elif root_node.type in assignment:
  567. left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
  568. right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
  569. if len(right_nodes) != len(left_nodes):
  570. left_nodes = [root_node.child_by_field_name('left')]
  571. right_nodes = [root_node.child_by_field_name('right')]
  572. if len(left_nodes) == 0:
  573. left_nodes = [root_node.child_by_field_name('left')]
  574. if len(right_nodes) == 0:
  575. right_nodes = [root_node.child_by_field_name('right')]
  576. if root_node.type == "operator_assignment":
  577. left_nodes = [root_node.children[0]]
  578. right_nodes = [root_node.children[-1]]
  579. DFG = []
  580. for node in right_nodes:
  581. temp, states = DFG_ruby(node, index_to_code, states)
  582. DFG += temp
  583. for left_node, right_node in zip(left_nodes, right_nodes):
  584. left_tokens_index = tree_to_variable_index(left_node, index_to_code)
  585. right_tokens_index = tree_to_variable_index(right_node, index_to_code)
  586. temp = []
  587. for token1_index in left_tokens_index:
  588. idx1, code1 = index_to_code[token1_index]
  589. temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
  590. [index_to_code[x][0] for x in right_tokens_index]))
  591. states[code1] = [idx1]
  592. DFG += temp
  593. return sorted(DFG, key=lambda x: x[1]), states
  594. elif root_node.type in if_statement:
  595. DFG = []
  596. current_states = states.copy()
  597. others_states = []
  598. tag = False
  599. if 'else' in root_node.type:
  600. tag = True
  601. for child in root_node.children:
  602. if 'else' in child.type:
  603. tag = True
  604. if child.type not in if_statement:
  605. temp, current_states = DFG_ruby(child, index_to_code, current_states)
  606. DFG += temp
  607. else:
  608. temp, new_states = DFG_ruby(child, index_to_code, states)
  609. DFG += temp
  610. others_states.append(new_states)
  611. others_states.append(current_states)
  612. if tag is False:
  613. others_states.append(states)
  614. new_states = {}
  615. for dic in others_states:
  616. for key in dic:
  617. if key not in new_states:
  618. new_states[key] = dic[key].copy()
  619. else:
  620. new_states[key] += dic[key]
  621. for key in new_states:
  622. new_states[key] = sorted(list(set(new_states[key])))
  623. return sorted(DFG, key=lambda x: x[1]), new_states
  624. elif root_node.type in for_statement:
  625. DFG = []
  626. for i in range(2):
  627. left_nodes = [root_node.child_by_field_name('pattern')]
  628. right_nodes = [root_node.child_by_field_name('value')]
  629. assert len(right_nodes) == len(left_nodes)
  630. for node in right_nodes:
  631. temp, states = DFG_ruby(node, index_to_code, states)
  632. DFG += temp
  633. for left_node, right_node in zip(left_nodes, right_nodes):
  634. left_tokens_index = tree_to_variable_index(left_node, index_to_code)
  635. right_tokens_index = tree_to_variable_index(right_node, index_to_code)
  636. temp = []
  637. for token1_index in left_tokens_index:
  638. idx1, code1 = index_to_code[token1_index]
  639. temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
  640. [index_to_code[x][0] for x in right_tokens_index]))
  641. states[code1] = [idx1]
  642. DFG += temp
  643. temp, states = DFG_ruby(root_node.child_by_field_name('body'), index_to_code, states)
  644. DFG += temp
  645. dic = {}
  646. for x in DFG:
  647. if (x[0], x[1], x[2]) not in dic:
  648. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  649. else:
  650. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  651. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  652. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  653. return sorted(DFG, key=lambda x: x[1]), states
  654. elif root_node.type in while_statement:
  655. DFG = []
  656. for i in range(2):
  657. for child in root_node.children:
  658. temp, states = DFG_ruby(child, index_to_code, states)
  659. DFG += temp
  660. dic = {}
  661. for x in DFG:
  662. if (x[0], x[1], x[2]) not in dic:
  663. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  664. else:
  665. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  666. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  667. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  668. return sorted(DFG, key=lambda x: x[1]), states
  669. else:
  670. DFG = []
  671. for child in root_node.children:
  672. if child.type in do_first_statement:
  673. temp, states = DFG_ruby(child, index_to_code, states)
  674. DFG += temp
  675. for child in root_node.children:
  676. if child.type not in do_first_statement:
  677. temp, states = DFG_ruby(child, index_to_code, states)
  678. DFG += temp
  679. return sorted(DFG, key=lambda x: x[1]), states
  680. def DFG_go(root_node, index_to_code, states):
  681. assignment = ['assignment_statement', ]
  682. def_statement = ['var_spec']
  683. increment_statement = ['inc_statement']
  684. if_statement = ['if_statement', 'else']
  685. for_statement = ['for_statement']
  686. enhanced_for_statement = []
  687. while_statement = []
  688. do_first_statement = []
  689. states = states.copy()
  690. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  691. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  692. if root_node.type == code:
  693. return [], states
  694. elif code in states:
  695. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  696. else:
  697. if root_node.type == 'identifier':
  698. states[code] = [idx]
  699. return [(code, idx, 'comesFrom', [], [])], states
  700. elif root_node.type in def_statement:
  701. name = root_node.child_by_field_name('name')
  702. value = root_node.child_by_field_name('value')
  703. DFG = []
  704. if value is None:
  705. indexs = tree_to_variable_index(name, index_to_code)
  706. for index in indexs:
  707. idx, code = index_to_code[index]
  708. DFG.append((code, idx, 'comesFrom', [], []))
  709. states[code] = [idx]
  710. return sorted(DFG, key=lambda x: x[1]), states
  711. else:
  712. name_indexs = tree_to_variable_index(name, index_to_code)
  713. value_indexs = tree_to_variable_index(value, index_to_code)
  714. temp, states = DFG_go(value, index_to_code, states)
  715. DFG += temp
  716. for index1 in name_indexs:
  717. idx1, code1 = index_to_code[index1]
  718. for index2 in value_indexs:
  719. idx2, code2 = index_to_code[index2]
  720. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  721. states[code1] = [idx1]
  722. return sorted(DFG, key=lambda x: x[1]), states
  723. elif root_node.type in assignment:
  724. left_nodes = root_node.child_by_field_name('left')
  725. right_nodes = root_node.child_by_field_name('right')
  726. DFG = []
  727. temp, states = DFG_go(right_nodes, index_to_code, states)
  728. DFG += temp
  729. name_indexs = tree_to_variable_index(left_nodes, index_to_code)
  730. value_indexs = tree_to_variable_index(right_nodes, index_to_code)
  731. for index1 in name_indexs:
  732. idx1, code1 = index_to_code[index1]
  733. for index2 in value_indexs:
  734. idx2, code2 = index_to_code[index2]
  735. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  736. states[code1] = [idx1]
  737. return sorted(DFG, key=lambda x: x[1]), states
  738. elif root_node.type in increment_statement:
  739. DFG = []
  740. indexs = tree_to_variable_index(root_node, index_to_code)
  741. for index1 in indexs:
  742. idx1, code1 = index_to_code[index1]
  743. for index2 in indexs:
  744. idx2, code2 = index_to_code[index2]
  745. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  746. states[code1] = [idx1]
  747. return sorted(DFG, key=lambda x: x[1]), states
  748. elif root_node.type in if_statement:
  749. DFG = []
  750. current_states = states.copy()
  751. others_states = []
  752. flag = False
  753. tag = False
  754. if 'else' in root_node.type:
  755. tag = True
  756. for child in root_node.children:
  757. if 'else' in child.type:
  758. tag = True
  759. if child.type not in if_statement and flag is False:
  760. temp, current_states = DFG_go(child, index_to_code, current_states)
  761. DFG += temp
  762. else:
  763. flag = True
  764. temp, new_states = DFG_go(child, index_to_code, states)
  765. DFG += temp
  766. others_states.append(new_states)
  767. others_states.append(current_states)
  768. if tag is False:
  769. others_states.append(states)
  770. new_states = {}
  771. for dic in others_states:
  772. for key in dic:
  773. if key not in new_states:
  774. new_states[key] = dic[key].copy()
  775. else:
  776. new_states[key] += dic[key]
  777. for key in states:
  778. if key not in new_states:
  779. new_states[key] = states[key]
  780. else:
  781. new_states[key] += states[key]
  782. for key in new_states:
  783. new_states[key] = sorted(list(set(new_states[key])))
  784. return sorted(DFG, key=lambda x: x[1]), new_states
  785. elif root_node.type in for_statement:
  786. DFG = []
  787. for child in root_node.children:
  788. temp, states = DFG_go(child, index_to_code, states)
  789. DFG += temp
  790. flag = False
  791. for child in root_node.children:
  792. if flag:
  793. temp, states = DFG_go(child, index_to_code, states)
  794. DFG += temp
  795. elif child.type == "for_clause":
  796. if child.child_by_field_name('update') is not None:
  797. temp, states = DFG_go(child.child_by_field_name('update'), index_to_code, states)
  798. DFG += temp
  799. flag = True
  800. dic = {}
  801. for x in DFG:
  802. if (x[0], x[1], x[2]) not in dic:
  803. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  804. else:
  805. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  806. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  807. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  808. return sorted(DFG, key=lambda x: x[1]), states
  809. else:
  810. DFG = []
  811. for child in root_node.children:
  812. if child.type in do_first_statement:
  813. temp, states = DFG_go(child, index_to_code, states)
  814. DFG += temp
  815. for child in root_node.children:
  816. if child.type not in do_first_statement:
  817. temp, states = DFG_go(child, index_to_code, states)
  818. DFG += temp
  819. return sorted(DFG, key=lambda x: x[1]), states
  820. def DFG_php(root_node, index_to_code, states):
  821. assignment = ['assignment_expression', 'augmented_assignment_expression']
  822. def_statement = ['simple_parameter']
  823. increment_statement = ['update_expression']
  824. if_statement = ['if_statement', 'else_clause']
  825. for_statement = ['for_statement']
  826. enhanced_for_statement = ['foreach_statement']
  827. while_statement = ['while_statement']
  828. do_first_statement = []
  829. states = states.copy()
  830. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  831. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  832. if root_node.type == code:
  833. return [], states
  834. elif code in states:
  835. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  836. else:
  837. if root_node.type == 'identifier':
  838. states[code] = [idx]
  839. return [(code, idx, 'comesFrom', [], [])], states
  840. elif root_node.type in def_statement:
  841. name = root_node.child_by_field_name('name')
  842. value = root_node.child_by_field_name('default_value')
  843. DFG = []
  844. if value is None:
  845. indexs = tree_to_variable_index(name, index_to_code)
  846. for index in indexs:
  847. idx, code = index_to_code[index]
  848. DFG.append((code, idx, 'comesFrom', [], []))
  849. states[code] = [idx]
  850. return sorted(DFG, key=lambda x: x[1]), states
  851. else:
  852. name_indexs = tree_to_variable_index(name, index_to_code)
  853. value_indexs = tree_to_variable_index(value, index_to_code)
  854. temp, states = DFG_php(value, index_to_code, states)
  855. DFG += temp
  856. for index1 in name_indexs:
  857. idx1, code1 = index_to_code[index1]
  858. for index2 in value_indexs:
  859. idx2, code2 = index_to_code[index2]
  860. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  861. states[code1] = [idx1]
  862. return sorted(DFG, key=lambda x: x[1]), states
  863. elif root_node.type in assignment:
  864. left_nodes = root_node.child_by_field_name('left')
  865. right_nodes = root_node.child_by_field_name('right')
  866. DFG = []
  867. temp, states = DFG_php(right_nodes, index_to_code, states)
  868. DFG += temp
  869. name_indexs = tree_to_variable_index(left_nodes, index_to_code)
  870. value_indexs = tree_to_variable_index(right_nodes, index_to_code)
  871. for index1 in name_indexs:
  872. idx1, code1 = index_to_code[index1]
  873. for index2 in value_indexs:
  874. idx2, code2 = index_to_code[index2]
  875. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  876. states[code1] = [idx1]
  877. return sorted(DFG, key=lambda x: x[1]), states
  878. elif root_node.type in increment_statement:
  879. DFG = []
  880. indexs = tree_to_variable_index(root_node, index_to_code)
  881. for index1 in indexs:
  882. idx1, code1 = index_to_code[index1]
  883. for index2 in indexs:
  884. idx2, code2 = index_to_code[index2]
  885. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  886. states[code1] = [idx1]
  887. return sorted(DFG, key=lambda x: x[1]), states
  888. elif root_node.type in if_statement:
  889. DFG = []
  890. current_states = states.copy()
  891. others_states = []
  892. flag = False
  893. tag = False
  894. if 'else' in root_node.type:
  895. tag = True
  896. for child in root_node.children:
  897. if 'else' in child.type:
  898. tag = True
  899. if child.type not in if_statement and flag is False:
  900. temp, current_states = DFG_php(child, index_to_code, current_states)
  901. DFG += temp
  902. else:
  903. flag = True
  904. temp, new_states = DFG_php(child, index_to_code, states)
  905. DFG += temp
  906. others_states.append(new_states)
  907. others_states.append(current_states)
  908. new_states = {}
  909. for dic in others_states:
  910. for key in dic:
  911. if key not in new_states:
  912. new_states[key] = dic[key].copy()
  913. else:
  914. new_states[key] += dic[key]
  915. for key in states:
  916. if key not in new_states:
  917. new_states[key] = states[key]
  918. else:
  919. new_states[key] += states[key]
  920. for key in new_states:
  921. new_states[key] = sorted(list(set(new_states[key])))
  922. return sorted(DFG, key=lambda x: x[1]), new_states
  923. elif root_node.type in for_statement:
  924. DFG = []
  925. for child in root_node.children:
  926. temp, states = DFG_php(child, index_to_code, states)
  927. DFG += temp
  928. flag = False
  929. for child in root_node.children:
  930. if flag:
  931. temp, states = DFG_php(child, index_to_code, states)
  932. DFG += temp
  933. elif child.type == "assignment_expression":
  934. flag = True
  935. dic = {}
  936. for x in DFG:
  937. if (x[0], x[1], x[2]) not in dic:
  938. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  939. else:
  940. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  941. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  942. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  943. return sorted(DFG, key=lambda x: x[1]), states
  944. elif root_node.type in enhanced_for_statement:
  945. name = None
  946. value = None
  947. for child in root_node.children:
  948. if child.type == 'variable_name' and value is None:
  949. value = child
  950. elif child.type == 'variable_name' and name is None:
  951. name = child
  952. break
  953. body = root_node.child_by_field_name('body')
  954. DFG = []
  955. for i in range(2):
  956. temp, states = DFG_php(value, index_to_code, states)
  957. DFG += temp
  958. name_indexs = tree_to_variable_index(name, index_to_code)
  959. value_indexs = tree_to_variable_index(value, index_to_code)
  960. for index1 in name_indexs:
  961. idx1, code1 = index_to_code[index1]
  962. for index2 in value_indexs:
  963. idx2, code2 = index_to_code[index2]
  964. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  965. states[code1] = [idx1]
  966. temp, states = DFG_php(body, index_to_code, states)
  967. DFG += temp
  968. dic = {}
  969. for x in DFG:
  970. if (x[0], x[1], x[2]) not in dic:
  971. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  972. else:
  973. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  974. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  975. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  976. return sorted(DFG, key=lambda x: x[1]), states
  977. elif root_node.type in while_statement:
  978. DFG = []
  979. for i in range(2):
  980. for child in root_node.children:
  981. temp, states = DFG_php(child, index_to_code, states)
  982. DFG += temp
  983. dic = {}
  984. for x in DFG:
  985. if (x[0], x[1], x[2]) not in dic:
  986. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  987. else:
  988. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  989. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  990. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  991. return sorted(DFG, key=lambda x: x[1]), states
  992. else:
  993. DFG = []
  994. for child in root_node.children:
  995. if child.type in do_first_statement:
  996. temp, states = DFG_php(child, index_to_code, states)
  997. DFG += temp
  998. for child in root_node.children:
  999. if child.type not in do_first_statement:
  1000. temp, states = DFG_php(child, index_to_code, states)
  1001. DFG += temp
  1002. return sorted(DFG, key=lambda x: x[1]), states
  1003. def DFG_javascript(root_node, index_to_code, states):
  1004. assignment = ['assignment_pattern', 'augmented_assignment_expression']
  1005. def_statement = ['variable_declarator']
  1006. increment_statement = ['update_expression']
  1007. if_statement = ['if_statement', 'else']
  1008. for_statement = ['for_statement']
  1009. enhanced_for_statement = []
  1010. while_statement = ['while_statement']
  1011. do_first_statement = []
  1012. states = states.copy()
  1013. if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
  1014. idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
  1015. if root_node.type == code:
  1016. return [], states
  1017. elif code in states:
  1018. return [(code, idx, 'comesFrom', [code], states[code].copy())], states
  1019. else:
  1020. if root_node.type == 'identifier':
  1021. states[code] = [idx]
  1022. return [(code, idx, 'comesFrom', [], [])], states
  1023. elif root_node.type in def_statement:
  1024. name = root_node.child_by_field_name('name')
  1025. value = root_node.child_by_field_name('value')
  1026. DFG = []
  1027. if value is None:
  1028. indexs = tree_to_variable_index(name, index_to_code)
  1029. for index in indexs:
  1030. idx, code = index_to_code[index]
  1031. DFG.append((code, idx, 'comesFrom', [], []))
  1032. states[code] = [idx]
  1033. return sorted(DFG, key=lambda x: x[1]), states
  1034. else:
  1035. name_indexs = tree_to_variable_index(name, index_to_code)
  1036. value_indexs = tree_to_variable_index(value, index_to_code)
  1037. temp, states = DFG_javascript(value, index_to_code, states)
  1038. DFG += temp
  1039. for index1 in name_indexs:
  1040. idx1, code1 = index_to_code[index1]
  1041. for index2 in value_indexs:
  1042. idx2, code2 = index_to_code[index2]
  1043. DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
  1044. states[code1] = [idx1]
  1045. return sorted(DFG, key=lambda x: x[1]), states
  1046. elif root_node.type in assignment:
  1047. left_nodes = root_node.child_by_field_name('left')
  1048. right_nodes = root_node.child_by_field_name('right')
  1049. DFG = []
  1050. temp, states = DFG_javascript(right_nodes, index_to_code, states)
  1051. DFG += temp
  1052. name_indexs = tree_to_variable_index(left_nodes, index_to_code)
  1053. value_indexs = tree_to_variable_index(right_nodes, index_to_code)
  1054. for index1 in name_indexs:
  1055. idx1, code1 = index_to_code[index1]
  1056. for index2 in value_indexs:
  1057. idx2, code2 = index_to_code[index2]
  1058. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  1059. states[code1] = [idx1]
  1060. return sorted(DFG, key=lambda x: x[1]), states
  1061. elif root_node.type in increment_statement:
  1062. DFG = []
  1063. indexs = tree_to_variable_index(root_node, index_to_code)
  1064. for index1 in indexs:
  1065. idx1, code1 = index_to_code[index1]
  1066. for index2 in indexs:
  1067. idx2, code2 = index_to_code[index2]
  1068. DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
  1069. states[code1] = [idx1]
  1070. return sorted(DFG, key=lambda x: x[1]), states
  1071. elif root_node.type in if_statement:
  1072. DFG = []
  1073. current_states = states.copy()
  1074. others_states = []
  1075. flag = False
  1076. tag = False
  1077. if 'else' in root_node.type:
  1078. tag = True
  1079. for child in root_node.children:
  1080. if 'else' in child.type:
  1081. tag = True
  1082. if child.type not in if_statement and flag is False:
  1083. temp, current_states = DFG_javascript(child, index_to_code, current_states)
  1084. DFG += temp
  1085. else:
  1086. flag = True
  1087. temp, new_states = DFG_javascript(child, index_to_code, states)
  1088. DFG += temp
  1089. others_states.append(new_states)
  1090. others_states.append(current_states)
  1091. if tag is False:
  1092. others_states.append(states)
  1093. new_states = {}
  1094. for dic in others_states:
  1095. for key in dic:
  1096. if key not in new_states:
  1097. new_states[key] = dic[key].copy()
  1098. else:
  1099. new_states[key] += dic[key]
  1100. for key in states:
  1101. if key not in new_states:
  1102. new_states[key] = states[key]
  1103. else:
  1104. new_states[key] += states[key]
  1105. for key in new_states:
  1106. new_states[key] = sorted(list(set(new_states[key])))
  1107. return sorted(DFG, key=lambda x: x[1]), new_states
  1108. elif root_node.type in for_statement:
  1109. DFG = []
  1110. for child in root_node.children:
  1111. temp, states = DFG_javascript(child, index_to_code, states)
  1112. DFG += temp
  1113. flag = False
  1114. for child in root_node.children:
  1115. if flag:
  1116. temp, states = DFG_javascript(child, index_to_code, states)
  1117. DFG += temp
  1118. elif child.type == "variable_declaration":
  1119. flag = True
  1120. dic = {}
  1121. for x in DFG:
  1122. if (x[0], x[1], x[2]) not in dic:
  1123. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  1124. else:
  1125. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  1126. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  1127. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  1128. return sorted(DFG, key=lambda x: x[1]), states
  1129. elif root_node.type in while_statement:
  1130. DFG = []
  1131. for i in range(2):
  1132. for child in root_node.children:
  1133. temp, states = DFG_javascript(child, index_to_code, states)
  1134. DFG += temp
  1135. dic = {}
  1136. for x in DFG:
  1137. if (x[0], x[1], x[2]) not in dic:
  1138. dic[(x[0], x[1], x[2])] = [x[3], x[4]]
  1139. else:
  1140. dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
  1141. dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
  1142. DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
  1143. return sorted(DFG, key=lambda x: x[1]), states
  1144. else:
  1145. DFG = []
  1146. for child in root_node.children:
  1147. if child.type in do_first_statement:
  1148. temp, states = DFG_javascript(child, index_to_code, states)
  1149. DFG += temp
  1150. for child in root_node.children:
  1151. if child.type not in do_first_statement:
  1152. temp, states = DFG_javascript(child, index_to_code, states)
  1153. DFG += temp
  1154. return sorted(DFG, key=lambda x: x[1]), states