tutorial.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import csv
  2. import os
  3. import re
  4. import shutil
  5. def del_create_analytics_folder():
  6. # del the analytics folder including subfolder
  7. # mkdir the analytics folder (only mkdir)
  8. if os.path.exists('analytics'):
  9. shutil.rmtree('analytics')
  10. if not os.path.exists('analytics'):
  11. os.mkdir('analytics')
  12. def course():
  13. if not os.path.exists('analytics'):
  14. os.mkdir('analytics')
  15. if os.path.exists('analytics/course'):
  16. shutil.rmtree('analytics/course')
  17. d = {'01':'btech',
  18. '11':'mtech',
  19. '21':'phd',
  20. '12':'msc'}
  21. with open('studentinfo_cs384.csv', newline='') as csvfile:
  22. reader = csv.DictReader(csvfile)
  23. if not os.path.exists('analytics/course'):
  24. os.mkdir('analytics/course')
  25. for row in reader:
  26. if len(row)==0:
  27. print(1)
  28. continue
  29. l = list(row.values())
  30. head = list(row.keys())
  31. stream = str(row['id'][-4:-2]).lower()
  32. yr = str(row['id'][:2])
  33. if str(row['id'][2:4]) in list(d.keys()):
  34. degree = d[str(row['id'][2:4])]
  35. else:
  36. with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
  37. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  38. if os.path.getsize('analytics/course/' + 'misc.csv')==0:
  39. f_write.writerow(head)
  40. f_write.writerow(l)
  41. f.close()
  42. continue
  43. csv_name = f'{yr}_{stream}_{degree}.csv'
  44. p = re.compile(r'\d\d\d\d\D\D\d\d')
  45. k = re.fullmatch(p,row['id'])
  46. if k:
  47. if not os.path.exists('analytics/course/'+ stream):
  48. os.mkdir('analytics/course/'+ stream)
  49. if not os.path.exists('analytics/course/'+ stream + '/' + degree):
  50. os.mkdir('analytics/course/'+ stream + '/' + degree )
  51. with open('analytics/course/'+ stream + '/' + degree + '/' + csv_name , mode = 'a') as f:
  52. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  53. if os.path.getsize('analytics/course/'+ stream + '/' + degree + '/' + csv_name)==0:
  54. f_write.writerow(head)
  55. f_write.writerow(l)
  56. f.close()
  57. else:
  58. with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
  59. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  60. if os.path.getsize('analytics/course/' + 'misc.csv')==0:
  61. f_write.writerow(head)
  62. f_write.writerow(l)
  63. f.close()
  64. csvfile.close()
  65. def country():
  66. if not os.path.exists('analytics'):
  67. os.mkdir('analytics')
  68. if os.path.exists('analytics/country'):
  69. shutil.rmtree('analytics/country')
  70. with open('studentinfo_cs384.csv', newline='') as csvfile:
  71. reader = csv.DictReader(csvfile)
  72. if not os.path.exists('analytics/country'):
  73. os.mkdir('analytics/country')
  74. for row in reader:
  75. l = list(row.values())
  76. head = list(row.keys())
  77. with open('analytics/country/'+row['country'].lower()+ '.csv', mode = 'a') as f:
  78. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  79. if os.path.getsize('analytics/country/'+row['country'].lower() + '.csv')==0:
  80. f_write.writerow(head)
  81. f_write.writerow(l)
  82. f.close()
  83. csvfile.close()
  84. def email_domain_extract():
  85. if not os.path.exists('analytics'):
  86. os.mkdir('analytics')
  87. if os.path.exists('analytics/email'):
  88. shutil.rmtree('analytics/email')
  89. with open('studentinfo_cs384.csv', newline='') as csvfile:
  90. reader = csv.DictReader(csvfile)
  91. if not os.path.exists('analytics/email'):
  92. os.mkdir('analytics/email')
  93. for row in reader:
  94. l = list(row.values())
  95. head = list(row.keys())
  96. if '@' in row['email'] and '.' in row['email']:
  97. domain = row['email'].split('@')[1].split('.')[0]
  98. with open('analytics/email/'+domain+ '.csv', mode = 'a') as f:
  99. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  100. if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
  101. f_write.writerow(head)
  102. f_write.writerow(l)
  103. f.close()
  104. else:
  105. with open('analytics/email/'+'misc'+ '.csv', mode = 'a') as f:
  106. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  107. if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
  108. f_write.writerow(head)
  109. f_write.writerow(l)
  110. f.close()
  111. csvfile.close()
  112. def gender():
  113. if not os.path.exists('analytics'):
  114. os.mkdir('analytics')
  115. if os.path.exists('analytics/gender'):
  116. shutil.rmtree('analytics/gender')
  117. with open('studentinfo_cs384.csv', newline='') as csvfile:
  118. reader = csv.DictReader(csvfile)
  119. if not os.path.exists('analytics/gender'):
  120. os.mkdir('analytics/gender')
  121. for row in reader:
  122. l = list(row.values())
  123. head = list(row.keys())
  124. gender = row['gender'].lower()
  125. with open('analytics/gender/'+gender+ '.csv', mode = 'a') as f:
  126. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  127. if os.path.getsize('analytics/gender/'+ gender + '.csv')==0:
  128. f_write.writerow(head)
  129. f_write.writerow(l)
  130. f.close()
  131. csvfile.close()
  132. def dob():
  133. if not os.path.exists('analytics'):
  134. os.mkdir('analytics')
  135. if os.path.exists('analytics/dob'):
  136. shutil.rmtree('analytics/dob')
  137. with open('studentinfo_cs384.csv', newline='') as csvfile:
  138. reader = csv.DictReader(csvfile)
  139. if not os.path.exists('analytics/dob'):
  140. os.mkdir('analytics/dob')
  141. for row in reader:
  142. l = list(row.values())
  143. head = list(row.keys())
  144. x = str(re.sub(r"\D","-",row['dob']))
  145. yr = int(x.split('-')[-1])
  146. k = int(yr)%10
  147. if k>4:
  148. name = 'bday_' + str(yr - k + 5) + '_' + str(yr - k + 9)
  149. else:
  150. name = 'bday_' + str(yr - k ) + '_' + str(yr - k + 4)
  151. if yr > 2014:
  152. name = 'bday_2015_2020'
  153. with open('analytics/dob/'+name+ '.csv', mode = 'a') as f:
  154. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  155. if os.path.getsize('analytics/dob/'+name+ '.csv')==0:
  156. f_write.writerow(head)
  157. f_write.writerow(l)
  158. f.close()
  159. def state():
  160. if not os.path.exists('analytics'):
  161. os.mkdir('analytics')
  162. if os.path.exists('analytics/state'):
  163. shutil.rmtree('analytics/state')
  164. with open('studentinfo_cs384.csv', newline='') as csvfile:
  165. reader = csv.DictReader(csvfile)
  166. if not os.path.exists('analytics/state'):
  167. os.mkdir('analytics/state')
  168. for row in reader:
  169. l = list(row.values())
  170. head = list(row.keys())
  171. with open('analytics/state/'+row['state'].lower()+ '.csv', mode = 'a') as f:
  172. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  173. if os.path.getsize('analytics/state/'+row['state'].lower() + '.csv')==0:
  174. f_write.writerow(head)
  175. f_write.writerow(l)
  176. f.close()
  177. csvfile.close()
  178. def blood_group():
  179. if not os.path.exists('analytics'):
  180. os.mkdir('analytics')
  181. if os.path.exists('analytics/blood_group'):
  182. shutil.rmtree('analytics/blood_group')
  183. with open('studentinfo_cs384.csv', newline='') as csvfile:
  184. reader = csv.DictReader(csvfile)
  185. if not os.path.exists('analytics/blood_group'):
  186. os.mkdir('analytics/blood_group')
  187. for row in reader:
  188. l = list(row.values())
  189. head = list(row.keys())
  190. with open('analytics/blood_group/'+row['blood_group']+ '.csv', mode = 'a') as f:
  191. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  192. if os.path.getsize('analytics/blood_group/'+row['blood_group'] + '.csv')==0:
  193. f_write.writerow(head)
  194. f_write.writerow(l)
  195. f.close()
  196. csvfile.close()
  197. # Create the new file here and also sort it in this function only.
  198. def new_file_sort():
  199. if not os.path.exists('analytics'):
  200. os.mkdir('analytics')
  201. new = []
  202. head = []
  203. with open('studentinfo_cs384.csv', newline='') as csvfile:
  204. reader = csv.DictReader(csvfile)
  205. for row in reader:
  206. head = list(row.keys())
  207. del head[1]
  208. head.insert(1,'first_name')
  209. head.insert(2,'last_name')
  210. k = list(row.values())
  211. del k[1]
  212. k.insert(1,row['full_name'].split()[0])
  213. k.insert(2,' '.join(row['full_name'].split()[1:]))
  214. new.append(k)
  215. csvfile.close()
  216. with open('analytics/studentinfo_cs384_names_split.csv', newline='',mode='w') as f:
  217. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  218. f_write.writerow(head)
  219. for i in new:
  220. f_write.writerow(i)
  221. f.close()
  222. #sorting
  223. dic = {}
  224. for i in new:
  225. dic[i[1]]='#$%^&*'.join(i)
  226. new = []
  227. with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'w') as f:
  228. print
  229. f.close()
  230. for i in sorted(dic.items()):
  231. new.append(i[1].split('#$%^&*'))
  232. with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'a') as f:
  233. f_write = csv.writer(f, delimiter=',',lineterminator='\r')
  234. f_write.writerow(head)
  235. for i in new:
  236. f_write.writerow(i)
  237. f.close()
  238. #if __name__ == "__main__":
  239. # del_create_analytics_folder()
  240. # course()
  241. # blood_group()
  242. # new_file_sort()s
  243. # state()
  244. # email_domain_extract()
  245. # state()
  246. # gender()
  247. # dob()