123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- import csv
- import os
- import re
- import shutil
- def del_create_analytics_folder():
- # del the analytics folder including subfolder
- # mkdir the analytics folder (only mkdir)
- if os.path.exists('analytics'):
- shutil.rmtree('analytics')
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- def course():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/course'):
- shutil.rmtree('analytics/course')
- d = {'01':'btech',
- '11':'mtech',
- '21':'phd',
- '12':'msc'}
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/course'):
- os.mkdir('analytics/course')
- for row in reader:
- if len(row)==0:
- print(1)
- continue
- l = list(row.values())
- head = list(row.keys())
- stream = str(row['id'][-4:-2]).lower()
- yr = str(row['id'][:2])
- if str(row['id'][2:4]) in list(d.keys()):
- degree = d[str(row['id'][2:4])]
- else:
- with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/course/' + 'misc.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- continue
- csv_name = f'{yr}_{stream}_{degree}.csv'
- p = re.compile(r'\d\d\d\d\D\D\d\d')
- k = re.fullmatch(p,row['id'])
- if k:
- if not os.path.exists('analytics/course/'+ stream):
- os.mkdir('analytics/course/'+ stream)
- if not os.path.exists('analytics/course/'+ stream + '/' + degree):
- os.mkdir('analytics/course/'+ stream + '/' + degree )
- with open('analytics/course/'+ stream + '/' + degree + '/' + csv_name , mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/course/'+ stream + '/' + degree + '/' + csv_name)==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- else:
- with open('analytics/course/' + 'misc.csv' , mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/course/' + 'misc.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- def country():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/country'):
- shutil.rmtree('analytics/country')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/country'):
- os.mkdir('analytics/country')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- with open('analytics/country/'+row['country'].lower()+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/country/'+row['country'].lower() + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- def email_domain_extract():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/email'):
- shutil.rmtree('analytics/email')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/email'):
- os.mkdir('analytics/email')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- if '@' in row['email'] and '.' in row['email']:
- domain = row['email'].split('@')[1].split('.')[0]
- with open('analytics/email/'+domain+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- else:
- with open('analytics/email/'+'misc'+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/email/'+ domain + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- def gender():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/gender'):
- shutil.rmtree('analytics/gender')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/gender'):
- os.mkdir('analytics/gender')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- gender = row['gender'].lower()
- with open('analytics/gender/'+gender+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/gender/'+ gender + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- def dob():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/dob'):
- shutil.rmtree('analytics/dob')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/dob'):
- os.mkdir('analytics/dob')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- x = str(re.sub(r"\D","-",row['dob']))
- yr = int(x.split('-')[-1])
- k = int(yr)%10
- if k>4:
- name = 'bday_' + str(yr - k + 5) + '_' + str(yr - k + 9)
- else:
- name = 'bday_' + str(yr - k ) + '_' + str(yr - k + 4)
- if yr > 2014:
- name = 'bday_2015_2020'
- with open('analytics/dob/'+name+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/dob/'+name+ '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
-
- def state():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/state'):
- shutil.rmtree('analytics/state')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/state'):
- os.mkdir('analytics/state')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- with open('analytics/state/'+row['state'].lower()+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/state/'+row['state'].lower() + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- def blood_group():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- if os.path.exists('analytics/blood_group'):
- shutil.rmtree('analytics/blood_group')
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- if not os.path.exists('analytics/blood_group'):
- os.mkdir('analytics/blood_group')
- for row in reader:
- l = list(row.values())
- head = list(row.keys())
- with open('analytics/blood_group/'+row['blood_group']+ '.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- if os.path.getsize('analytics/blood_group/'+row['blood_group'] + '.csv')==0:
- f_write.writerow(head)
- f_write.writerow(l)
- f.close()
- csvfile.close()
- # Create the new file here and also sort it in this function only.
- def new_file_sort():
- if not os.path.exists('analytics'):
- os.mkdir('analytics')
- new = []
- head = []
- with open('studentinfo_cs384.csv', newline='') as csvfile:
- reader = csv.DictReader(csvfile)
- for row in reader:
- head = list(row.keys())
- del head[1]
- head.insert(1,'first_name')
- head.insert(2,'last_name')
- k = list(row.values())
- del k[1]
- k.insert(1,row['full_name'].split()[0])
- k.insert(2,' '.join(row['full_name'].split()[1:]))
- new.append(k)
- csvfile.close()
- with open('analytics/studentinfo_cs384_names_split.csv', newline='',mode='w') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- f_write.writerow(head)
- for i in new:
- f_write.writerow(i)
- f.close()
- #sorting
- dic = {}
- for i in new:
- dic[i[1]]='#$%^&*'.join(i)
- new = []
- with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'w') as f:
- print
- f.close()
- for i in sorted(dic.items()):
- new.append(i[1].split('#$%^&*'))
- with open('analytics/studentinfo_cs384_names_split_sorted_first_name.csv', mode = 'a') as f:
- f_write = csv.writer(f, delimiter=',',lineterminator='\r')
- f_write.writerow(head)
- for i in new:
- f_write.writerow(i)
- f.close()
- #if __name__ == "__main__":
- # del_create_analytics_folder()
- # course()
- # blood_group()
- # new_file_sort()s
- # state()
- # email_domain_extract()
- # state()
- # gender()
- # dob()
|