DB2MongoNew.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. # -*-coding:utf-8-*-
  2. import MySQLdb
  3. import pandas as pd
  4. import numpy as np
  5. import requests
  6. import json
  7. import datetime
  8. import sys
  9. # DB_IP = '101.37.175.111'
  10. # DB_USER = 'mooctest'
  11. # DB_PWD = 'secr3t!'
  12. # DB_NAME = 'mooctest_main_dev'
  13. # USER_DB_NAME = 'mooctest_user'
  14. # MONGO_URL = 'http://114.55.91.27:80/testGraph/evaluationData'
  15. # MONGO_AUTH = 'Basic YWRtaW46MTIzdHNldGNvb20='
  16. DIMENSION = ['eva_runEfficiency', 'eva_branch', 'eva_mutation', 'eva_codeEfficiency','eva_maintainability']
  17. CASE_IDS = set() # 记录case_id
  18. EXAM_NAME = ''
  19. def read_data_from_db(exam_id):
  20. global EXAM_NAME
  21. # 打开数据库连接
  22. db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, DB_NAME, charset='utf8')
  23. cursor = db.cursor()
  24. query_exam_sql = ('select begin_time,name from `exam` where id={}').format(exam_id)
  25. cursor.execute(query_exam_sql)
  26. exam_data = cursor.fetchone()
  27. millis = exam_data[0].timestamp()*1000
  28. EXAM_NAME = exam_data[1]
  29. query_best_sql = ('select uu.score, uu.type, uu.worker_id, uu.case_id, min(up.upload_time) from `upload_record` up ,'+
  30. '(select max(g.score) as score, g.type as type, ur.worker_id as worker_id, ur.case_id as case_id '+
  31. 'from `upload_record` ur ' +
  32. 'INNER JOIN `grade_general` g on ur.id=g.upload_id '+
  33. 'where ur.exam_id= {} '+
  34. 'GROUP BY g.type, ur.worker_id, ur.case_id) uu '+
  35. 'where uu.worker_id = up.worker_id and uu.case_id=up.case_id and up.exam_id={} '+
  36. 'GROUP BY uu.worker_id, uu.case_id, uu.type, uu.score').format(exam_id, exam_id)
  37. cursor.execute(query_best_sql)
  38. grade_data = cursor.fetchall()
  39. eva_dict = {}
  40. for score, typ, wid, cid, utime in grade_data:
  41. CASE_IDS.add(str(cid))
  42. key = str(cid)+'_'+str(wid)
  43. value = eva_dict.get(key, {})
  44. if typ == 'branch':
  45. value['eva_branch'] = score
  46. value['eva_codeEfficiency'] = (float(utime)-millis)/(1000*60)
  47. elif typ == 'mutation':
  48. value['eva_mutation'] = score
  49. eva_dict[key] = value
  50. query_latest_sql = ('select grade.score,mid.type, mid.case_id, mid.worker_id from grade_general grade, '+
  51. '(select max(gg.id) gid, up.worker_id, up.case_id, gg.type from grade_general gg INNER JOIN upload_record up '+
  52. 'on gg.upload_id = up.id '+
  53. 'where up.exam_id={} and gg.type in ("run_time","maintainability") '+
  54. 'GROUP BY up.worker_id, up.case_id, gg.type) mid '+
  55. 'where grade.id = mid.gid').format(exam_id)
  56. cursor.execute(query_latest_sql)
  57. grade_data = cursor.fetchall()
  58. for score, typ, cid, wid in grade_data:
  59. CASE_IDS.add(str(cid))
  60. key = str(cid)+'_'+str(wid)
  61. value = eva_dict.get(key, {})
  62. if typ == 'run_time':
  63. value['eva_runEfficiency'] = score
  64. elif typ == 'maintainability':
  65. value['eva_maintainability'] = score
  66. eva_dict[key] = value
  67. eva_arr = []
  68. for key in eva_dict:
  69. eva_item = []
  70. print(key, ": ", eva_dict[key])
  71. eva_item.append(exam_id) # exam_id
  72. eva_item.append(key.split('_')[0]) # case_id
  73. eva_item.append(key.split('_')[1]) # worker_id
  74. value = eva_dict[key]
  75. for eva_type in DIMENSION:
  76. if eva_type in value:
  77. eva_item.append(value[eva_type])
  78. else:
  79. eva_item.append(None)
  80. eva_arr.append(eva_item)
  81. db.close()
  82. return pd.DataFrame(eva_arr)
  83. # query_sql = ('select case_id, worker_id, type, score from `grade_general` where exam_id={} and type in ("'+('","'.join(DIMENSION))+'");').format(exam_id)
  84. # print(query_sql)
  85. # cursor.execute(query_sql)
  86. # eva_data = cursor.fetchall()
  87. # eva_dict = {}
  88. # for cid, wid, eva_type, score in eva_data:
  89. # CASE_IDS.add(str(cid))
  90. # key = str(cid)+'_'+str(wid)
  91. # if key in eva_dict:
  92. # value = eva_dict[key]
  93. # else:
  94. # value = {}
  95. # value[eva_type] = score
  96. # eva_dict[key] = value
  97. def calculate_score(data):
  98. # 用均值填充缺少的checkstyle数据
  99. data = data.fillna(data.mean()[7])
  100. print('-------init data--------\n',data)
  101. #分case处理
  102. dfArray =[]
  103. cont = 0
  104. # print(CASE_IDS)
  105. for i in CASE_IDS:
  106. matrix = np.array(data[data[1].isin([i])], dtype=np.float32)
  107. # 运行时间
  108. matrix[:, 3] = np.log1p(matrix[:, 4] / matrix[:, 3])
  109. # 最高覆盖率时间clear
  110. matrix[:, 6] = np.log1p(matrix[:, 4] / (matrix[:, 6] + 480))
  111. # 代码可维护性
  112. matrix[:, 7] = np.log1p(matrix[:, 4] / matrix[:, 7])
  113. # bug检测率
  114. matrix[:, 5] = np.log1p(matrix[:, 5])
  115. matrix[:, 3:8] = 100 * (matrix[:, 3:8] - matrix[:, 3:8].min(axis=0)) / (matrix[:, 3:8].max(axis=0) - matrix[:, 3:8].min(axis=0))
  116. dfArray.append(pd.DataFrame(matrix,columns=['taskId','caseId','workerId',"a"+ str(cont),"b"+str(cont),"c"+str(cont),"d"+ str(cont),"e"+str(cont)]))
  117. cont += 1
  118. print("-------------------d\n",dfArray)
  119. df = pd.merge(dfArray[0], dfArray[1], how='outer', on=['taskId','workerId'])
  120. df = df.fillna(0)
  121. df["a1"] = (df["a0"] + df["a1"])/2
  122. df["b1"] = (df["b0"] + df["b1"])/2
  123. df["c1"] = (df["c0"] + df["c1"])/2
  124. df["e1"] = (df["e0"] + df["e1"])/2
  125. df["d1"] = (df["d0"] + df["d1"])/2
  126. df = df.loc[:, ['workerId', 'a1', 'b1', 'c1', 'd1', 'e1']]
  127. result = np.array(df)
  128. result[:,1:6] = 100 * (result[:,1:6] - result[:,1:6].min(axis=0)) / (result[:,1:6].max(axis=0) - result[:,1:6].min(axis=0))
  129. # print(result.max(axis=0))
  130. result = np.around(result, decimals=2)
  131. print('----------result-------------\n', result)
  132. exam_id = int(data[0][0])
  133. delete_exist_data(exam_id)
  134. write2mongo(result.tolist(), exam_id)
  135. def delete_exist_data(exam_id):
  136. headers = {'content-type': 'application/json', 'Authorization': MONGO_AUTH}
  137. query_url = MONGO_URL+'/*?filter={"examId":'+str(exam_id)+'}'
  138. # print(query_url)
  139. # 先删除之前的数据
  140. query_ret = requests.delete(query_url,headers=headers)
  141. if query_ret.status_code == 200:
  142. print('----delete exists data----\n',json.loads(query_ret.text))
  143. else:
  144. print('----delete code----\n',query_ret.status_code)
  145. # 若collection不存在则创建
  146. requests.put(MONGO_URL,headers = headers)
  147. def write2mongo(data, exam_id):
  148. print('----write data-----')
  149. # 连接mooctest_user
  150. user_db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, USER_DB_NAME, charset='utf8')
  151. user_cursor = user_db.cursor()
  152. # 连接mooctest_main
  153. main_db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, DB_NAME, charset='utf8')
  154. main_cursor = main_db.cursor()
  155. body = []
  156. for item in data:
  157. print(item)
  158. # 获取学生姓名
  159. item_worker_id = int(float(item[0]))
  160. query_worker = ('select name from `user` where id = {}').format(item_worker_id)
  161. user_cursor.execute(query_worker)
  162. item_worker_name = user_cursor.fetchone()[0]
  163. # 获取当前学生排名
  164. query_ranking = ('SELECT u.rank, u.score FROM '+
  165. '(SELECT participant_id, score, (@ranknum :=@ranknum + 1) AS rank FROM submit_record, (SELECT(@ranknum := 0)) b WHERE exam_id={} ORDER BY score DESC ) u '+
  166. 'WHERE u.participant_id = {};').format(exam_id, item_worker_id)
  167. main_cursor.execute(query_ranking)
  168. rank_data = main_cursor.fetchone()
  169. one_body = {
  170. 'examId': exam_id,
  171. 'examName': EXAM_NAME,
  172. 'ranking': int(float(rank_data[0])),
  173. 'totalScore': rank_data[1],
  174. 'workerId': item_worker_id,
  175. 'workerName': item_worker_name,
  176. 'graphData':[
  177. {'key':'runEfficiency', 'value':item[1], 'min':0, 'max':100},
  178. {'key':'branch', 'value':item[2], 'min':0, 'max':100},
  179. {'key':'mutation', 'value':item[3], 'min':0, 'max':100},
  180. {'key':'codeEfficiency', 'value':item[4], 'min':0, 'max':100},
  181. {'key':'maintainability', 'value':item[5], 'min':0, 'max':100},
  182. ]
  183. }
  184. body.append(one_body)
  185. user_db.close()
  186. main_db.close()
  187. url = MONGO_URL
  188. headers = {'content-type': 'application/json', 'Authorization': MONGO_AUTH}
  189. ret = requests.post(url, json=body, headers=headers)
  190. if ret.status_code == 200:
  191. print(json.loads(ret.text))
  192. else:
  193. print(ret.status_code, "save failed!")
  194. DB_IP = sys.argv[2]
  195. DB_USER = sys.argv[3]
  196. DB_PWD = sys.argv[4]
  197. DB_NAME = sys.argv[5]
  198. MONGO_URL = sys.argv[6]
  199. MONGO_AUTH = sys.argv[7]
  200. USER_DB_NAME = sys.argv[8]
  201. DIMENSION = ['eva_runEfficiency', 'eva_branch', 'eva_mutation', 'eva_codeEfficiency','eva_maintainability']
  202. data = read_data_from_db(sys.argv[1])
  203. calculate_score(data)