123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236 |
- # -*-coding:utf-8-*-
- import MySQLdb
- import pandas as pd
- import numpy as np
- import requests
- import json
- import datetime
- import sys
- # DB_IP = '101.37.175.111'
- # DB_USER = 'mooctest'
- # DB_PWD = 'secr3t!'
- # DB_NAME = 'mooctest_main_dev'
- # USER_DB_NAME = 'mooctest_user'
- # MONGO_URL = 'http://114.55.91.27:80/testGraph/evaluationData'
- # MONGO_AUTH = 'Basic YWRtaW46MTIzdHNldGNvb20='
- DIMENSION = ['eva_runEfficiency', 'eva_branch', 'eva_mutation', 'eva_codeEfficiency','eva_maintainability']
- CASE_IDS = set() # 记录case_id
- EXAM_NAME = ''
- def read_data_from_db(exam_id):
- global EXAM_NAME
- # 打开数据库连接
- db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, DB_NAME, charset='utf8')
- cursor = db.cursor()
- query_exam_sql = ('select begin_time,name from `exam` where id={}').format(exam_id)
- cursor.execute(query_exam_sql)
- exam_data = cursor.fetchone()
- millis = exam_data[0].timestamp()*1000
- EXAM_NAME = exam_data[1]
- query_best_sql = ('select uu.score, uu.type, uu.worker_id, uu.case_id, min(up.upload_time) from `upload_record` up ,'+
- '(select max(g.score) as score, g.type as type, ur.worker_id as worker_id, ur.case_id as case_id '+
- 'from `upload_record` ur ' +
- 'INNER JOIN `grade_general` g on ur.id=g.upload_id '+
- 'where ur.exam_id= {} '+
- 'GROUP BY g.type, ur.worker_id, ur.case_id) uu '+
- 'where uu.worker_id = up.worker_id and uu.case_id=up.case_id and up.exam_id={} '+
- 'GROUP BY uu.worker_id, uu.case_id, uu.type, uu.score').format(exam_id, exam_id)
-
- cursor.execute(query_best_sql)
- grade_data = cursor.fetchall()
- eva_dict = {}
- for score, typ, wid, cid, utime in grade_data:
- CASE_IDS.add(str(cid))
- key = str(cid)+'_'+str(wid)
- value = eva_dict.get(key, {})
- if typ == 'branch':
- value['eva_branch'] = score
- value['eva_codeEfficiency'] = (float(utime)-millis)/(1000*60)
- elif typ == 'mutation':
- value['eva_mutation'] = score
- eva_dict[key] = value
- query_latest_sql = ('select grade.score,mid.type, mid.case_id, mid.worker_id from grade_general grade, '+
- '(select max(gg.id) gid, up.worker_id, up.case_id, gg.type from grade_general gg INNER JOIN upload_record up '+
- 'on gg.upload_id = up.id '+
- 'where up.exam_id={} and gg.type in ("run_time","maintainability") '+
- 'GROUP BY up.worker_id, up.case_id, gg.type) mid '+
- 'where grade.id = mid.gid').format(exam_id)
- cursor.execute(query_latest_sql)
- grade_data = cursor.fetchall()
- for score, typ, cid, wid in grade_data:
- CASE_IDS.add(str(cid))
- key = str(cid)+'_'+str(wid)
- value = eva_dict.get(key, {})
- if typ == 'run_time':
- value['eva_runEfficiency'] = score
- elif typ == 'maintainability':
- value['eva_maintainability'] = score
- eva_dict[key] = value
-
- eva_arr = []
- for key in eva_dict:
- eva_item = []
- print(key, ": ", eva_dict[key])
- eva_item.append(exam_id) # exam_id
- eva_item.append(key.split('_')[0]) # case_id
- eva_item.append(key.split('_')[1]) # worker_id
- value = eva_dict[key]
- for eva_type in DIMENSION:
- if eva_type in value:
- eva_item.append(value[eva_type])
- else:
- eva_item.append(None)
- eva_arr.append(eva_item)
-
- db.close()
- return pd.DataFrame(eva_arr)
-
- # query_sql = ('select case_id, worker_id, type, score from `grade_general` where exam_id={} and type in ("'+('","'.join(DIMENSION))+'");').format(exam_id)
- # print(query_sql)
- # cursor.execute(query_sql)
- # eva_data = cursor.fetchall()
- # eva_dict = {}
- # for cid, wid, eva_type, score in eva_data:
- # CASE_IDS.add(str(cid))
- # key = str(cid)+'_'+str(wid)
- # if key in eva_dict:
- # value = eva_dict[key]
- # else:
- # value = {}
- # value[eva_type] = score
- # eva_dict[key] = value
-
-
- def calculate_score(data):
- # 用均值填充缺少的checkstyle数据
- data = data.fillna(data.mean()[7])
- print('-------init data--------\n',data)
- #分case处理
- dfArray =[]
- cont = 0
- # print(CASE_IDS)
- for i in CASE_IDS:
- matrix = np.array(data[data[1].isin([i])], dtype=np.float32)
- # 运行时间
- matrix[:, 3] = np.log1p(matrix[:, 4] / matrix[:, 3])
- # 最高覆盖率时间clear
- matrix[:, 6] = np.log1p(matrix[:, 4] / (matrix[:, 6] + 480))
- # 代码可维护性
- matrix[:, 7] = np.log1p(matrix[:, 4] / matrix[:, 7])
- # bug检测率
- matrix[:, 5] = np.log1p(matrix[:, 5])
- matrix[:, 3:8] = 100 * (matrix[:, 3:8] - matrix[:, 3:8].min(axis=0)) / (matrix[:, 3:8].max(axis=0) - matrix[:, 3:8].min(axis=0))
- dfArray.append(pd.DataFrame(matrix,columns=['taskId','caseId','workerId',"a"+ str(cont),"b"+str(cont),"c"+str(cont),"d"+ str(cont),"e"+str(cont)]))
- cont += 1
- print("-------------------d\n",dfArray)
- df = pd.merge(dfArray[0], dfArray[1], how='outer', on=['taskId','workerId'])
- df = df.fillna(0)
- df["a1"] = (df["a0"] + df["a1"])/2
- df["b1"] = (df["b0"] + df["b1"])/2
- df["c1"] = (df["c0"] + df["c1"])/2
- df["e1"] = (df["e0"] + df["e1"])/2
- df["d1"] = (df["d0"] + df["d1"])/2
- df = df.loc[:, ['workerId', 'a1', 'b1', 'c1', 'd1', 'e1']]
- result = np.array(df)
- result[:,1:6] = 100 * (result[:,1:6] - result[:,1:6].min(axis=0)) / (result[:,1:6].max(axis=0) - result[:,1:6].min(axis=0))
- # print(result.max(axis=0))
- result = np.around(result, decimals=2)
- print('----------result-------------\n', result)
- exam_id = int(data[0][0])
- delete_exist_data(exam_id)
- write2mongo(result.tolist(), exam_id)
- def delete_exist_data(exam_id):
- headers = {'content-type': 'application/json', 'Authorization': MONGO_AUTH}
- query_url = MONGO_URL+'/*?filter={"examId":'+str(exam_id)+'}'
- # print(query_url)
- # 先删除之前的数据
- query_ret = requests.delete(query_url,headers=headers)
- if query_ret.status_code == 200:
- print('----delete exists data----\n',json.loads(query_ret.text))
- else:
- print('----delete code----\n',query_ret.status_code)
- # 若collection不存在则创建
- requests.put(MONGO_URL,headers = headers)
- def write2mongo(data, exam_id):
- print('----write data-----')
- # 连接mooctest_user
- user_db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, USER_DB_NAME, charset='utf8')
- user_cursor = user_db.cursor()
- # 连接mooctest_main
- main_db = MySQLdb.connect(DB_IP, DB_USER, DB_PWD, DB_NAME, charset='utf8')
- main_cursor = main_db.cursor()
- body = []
- for item in data:
- print(item)
- # 获取学生姓名
- item_worker_id = int(float(item[0]))
- query_worker = ('select name from `user` where id = {}').format(item_worker_id)
- user_cursor.execute(query_worker)
- item_worker_name = user_cursor.fetchone()[0]
- # 获取当前学生排名
- query_ranking = ('SELECT u.rank, u.score FROM '+
- '(SELECT participant_id, score, (@ranknum :=@ranknum + 1) AS rank FROM submit_record, (SELECT(@ranknum := 0)) b WHERE exam_id={} ORDER BY score DESC ) u '+
- 'WHERE u.participant_id = {};').format(exam_id, item_worker_id)
- main_cursor.execute(query_ranking)
- rank_data = main_cursor.fetchone()
- one_body = {
- 'examId': exam_id,
- 'examName': EXAM_NAME,
- 'ranking': int(float(rank_data[0])),
- 'totalScore': rank_data[1],
- 'workerId': item_worker_id,
- 'workerName': item_worker_name,
- 'graphData':[
- {'key':'runEfficiency', 'value':item[1], 'min':0, 'max':100},
- {'key':'branch', 'value':item[2], 'min':0, 'max':100},
- {'key':'mutation', 'value':item[3], 'min':0, 'max':100},
- {'key':'codeEfficiency', 'value':item[4], 'min':0, 'max':100},
- {'key':'maintainability', 'value':item[5], 'min':0, 'max':100},
- ]
- }
- body.append(one_body)
- user_db.close()
- main_db.close()
- url = MONGO_URL
- headers = {'content-type': 'application/json', 'Authorization': MONGO_AUTH}
- ret = requests.post(url, json=body, headers=headers)
- if ret.status_code == 200:
- print(json.loads(ret.text))
- else:
- print(ret.status_code, "save failed!")
- DB_IP = sys.argv[2]
- DB_USER = sys.argv[3]
- DB_PWD = sys.argv[4]
- DB_NAME = sys.argv[5]
- MONGO_URL = sys.argv[6]
- MONGO_AUTH = sys.argv[7]
- USER_DB_NAME = sys.argv[8]
- DIMENSION = ['eva_runEfficiency', 'eva_branch', 'eva_mutation', 'eva_codeEfficiency','eva_maintainability']
- data = read_data_from_db(sys.argv[1])
- calculate_score(data)
|