visualize_5_27.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. def mhat(df="dataframe", chr=None, pv=None, log_scale=True, color=None, dim=(6,4), r=300, ar=90, gwas_sign_line=False,
  2. gwasp=5E-08, dotsize=8, markeridcol=None, markernames=None, gfont=8, valpha=1, show=False, figtype='png',
  3. axxlabel=None, axylabel=None, axlabelfontsize=9, axlabelfontname="Arial", axtickfontsize=9,
  4. axtickfontname="Arial", ylm=None, gstyle=1, figname='manhattan', theme=None):
  5. _x, _y = 'Chromosomes', r'$ -log_{10}(P)$'
  6. rand_colors = ('#a7414a', '#282726', '#6a8a82', '#a37c27', '#563838', '#0584f2', '#f28a30', '#f05837',
  7. '#6465a5', '#00743f', '#be9063', '#de8cf0', '#888c46', '#c0334d', '#270101', '#8d2f23',
  8. '#ee6c81', '#65734b', '#14325c', '#704307', '#b5b3be', '#f67280', '#ffd082', '#ffd800',
  9. '#ad62aa', '#21bf73', '#a0855b', '#5edfff', '#08ffc8', '#ca3e47', '#c9753d', '#6c5ce7',
  10. '#a997df', '#513b56', '#590925', '#007fff', '#bf1363', '#f39237', '#0a3200', '#8c271e')
  11. if log_scale:
  12. # minus log10 of P-value
  13. df['tpval'] = -np.log10(df[pv])
  14. else:
  15. # for Fst values
  16. df['tpval'] = df[pv]
  17. # df = df.sort_values(chr)
  18. # if the column contains numeric strings
  19. df = df.loc[pd.to_numeric(df[chr], errors='coerce').sort_values().index]
  20. # add indices
  21. df['ind'] = range(len(df))
  22. df_group = df.groupby(chr)
  23. if color is not None and len(color) == 2:
  24. color_1 = int(df[chr].nunique() / 2) * [color[0]]
  25. color_2 = int(df[chr].nunique() / 2) * [color[1]]
  26. if df[chr].nunique() % 2 == 0:
  27. color_list = list(reduce(lambda x, y: x+y, zip(color_1, color_2)))
  28. elif df[chr].nunique() % 2 == 1:
  29. color_list = list(reduce(lambda x, y: x+y, zip(color_1, color_2)))
  30. color_list.append(color[0])
  31. elif color is not None and len(color) == df[chr].nunique():
  32. color_list = color
  33. elif color is None:
  34. # select colors randomly from the list based in number of chr
  35. color_list = sample(rand_colors, df[chr].nunique())
  36. else:
  37. print("Error: in color argument")
  38. sys.exit(1)
  39. xlabels = []
  40. xticks = []
  41. if theme == 'dark':
  42. general.dark_bg()
  43. fig, ax = plt.subplots(figsize=dim)
  44. i = 0
  45. for label, df1 in df.groupby(chr):
  46. df1.plot(kind='scatter', x='ind', y='tpval', color=color_list[i], s=dotsize, alpha=valpha, ax=ax)
  47. df1_max_ind = df1['ind'].iloc[-1]
  48. df1_min_ind = df1['ind'].iloc[0]
  49. xlabels.append(label)
  50. xticks.append((df1_max_ind - (df1_max_ind - df1_min_ind) / 2))
  51. i += 1
  52. # add GWAS significant line
  53. if gwas_sign_line is True:
  54. ax.axhline(y=-np.log10(gwasp), linestyle='--', color='#7d7d7d', linewidth=1)
  55. if markernames is not None:
  56. marker.geneplot_mhat(df, markeridcol, chr, pv, gwasp, markernames, gfont, gstyle, ax=ax)
  57. ax.margins(x=0)
  58. ax.margins(y=0)
  59. ax.set_xticks(xticks)
  60. if log_scale:
  61. ax.set_ylim([0, max(df['tpval'] + 1)])
  62. if ylm:
  63. ylm = np.arange(ylm[0], ylm[1], ylm[2])
  64. else:
  65. ylm = np.arange(0, max(df['tpval']+1), 1)
  66. ax.set_yticks(ylm)
  67. ax.set_xticklabels(xlabels, rotation=ar)
  68. # ax.set_yticklabels(ylm, fontsize=axtickfontsize, fontname=axtickfontname, rotation=ar)
  69. if axxlabel:
  70. _x = axxlabel
  71. if axylabel:
  72. _y = axylabel
  73. ax.set_xlabel(_x, fontsize=axlabelfontsize, fontname=axlabelfontname)
  74. ax.set_ylabel(_y, fontsize=axlabelfontsize, fontname=axlabelfontname)
  75. general.get_figure(show, r, figtype, figname, theme)