1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- def psdnmyz_3():
- # load TWO csv to be sent to be pseudonymz
- # metrics_df=pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_METRICS_Skel_header.csv')
- seg_df = pd.read_csv('/home/arasan/testrep/psmd/jureca/psmd_seg2_vols.csv')
- # add rnadom id column to both df
- # below line is a disaster
- # metrics_df['RNDNAME'] = metrics_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
- # seg_df['RNDNAME'] = seg_df['NAME'].apply(lambda x: gocept.pseudonymize.integer(x, 'secret'))
- # a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
- # metrics_df['RNDNAME']=a
- # print 'after rqndom id has been added'
- # flagg=True
- # while(flagg):
- # try:
- # print pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1)
- # except ValueError:
- # print 'NO DUPLICAtes'
- # metrics_df.to_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
- # flagg=False
- # else:
- # print 'DUPES'
- # metrics_df=metrics_df.drop('RNDNAME', axis=1)
- # a=np.random.randint(100000,999999,metrics_df.NAME.values.size)
- # metrics_df['RNDNAME']=a
- # load double chekced randomeized df 1) above try catch 2) using np unique
- metrnd = pd.read_csv('/home/arasan/testrep/psmd/jureca/TOTAL_rnd_temp.csv')
- seg_df['SNO'] = seg_df.index + 1
- # metrnd['SNO']=seg_df.index+1
- # add RNDAME column to seg_df
- seg_df['RNDNAME'] = metrnd.RNDNAME.values
- # rename columns NANME to ID and RNDNAME to NAME
- # seg_df=seg_df.rename(index=str, columns={"NAME": "ID"})
- seg_df = seg_df.rename(index=str, columns={"RNDNAME": "NAME"})
- # metrnd=metrnd.rename(index=str, columns={"NAME": "ID"})
- # metrnd=metrnd.rename(index=str, columns={"RNDNAME": "NAME"})
- # dump map out with 3 columns ID,NAME,SNO
- # mapdf=metrnd[['ID','NAME','SNO']]
- # mapdf.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/psdnmyz_map.csv',index=False)
- # drop ID and SNO
- seg_df = seg_df.drop(['ID', 'SNO'], axis=1)
- # metrnd=metrnd.drop(['ID','SNO'],axis=1)
- # move NAME column to first position
- # metrnd=metrnd[['NAME','mean_skel_MD_LH_RH','sd_skel_MD_LH_RH','Pw90S_skel_MD_LH_RH','mean_skel_FA_LH_RH','sd_skel_FA_LH_RH','mean_skel_AD_LH_RH','sd_skel_AD_LH_RH','mean_skel_RD_LH_RH','sd_skel_RD_LH_RH']]
- seg_df = seg_df[['NAME', 'AGE', 'SEX', 'ICV']]
- # if pd.concat(g for _, g in metrics_df.groupby("RNDNAME") if len(g) > 1).RNDNAME.values.size:
- # print 'NOT OK'
- # else:
- # print 'OK'
- # metrnd.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet2/TOTAL_METRICS_Skel_header.csv',index=False)
- seg_df.to_csv('/home/arasan/testrep/psmd/jureca/bordeaux_packet3/psmd_seg2_vols.csv', index=False)
|