visualize_5_4.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. def volcano(df="dataframe", lfc=None, pv=None, lfc_thr=(1, 1), pv_thr=(0.05, 0.05), color=("green", "grey", "red"),
  2. valpha=1, geneid=None, genenames=None, gfont=8, dim=(5, 5), r=300, ar=90, dotsize=8, markerdot="o",
  3. sign_line=False, gstyle=1, show=False, figtype='png', axtickfontsize=9,
  4. axtickfontname="Arial", axlabelfontsize=9, axlabelfontname="Arial", axxlabel=None,
  5. axylabel=None, xlm=None, ylm=None, plotlegend=False, legendpos='best',
  6. figname='volcano', legendanchor=None,
  7. legendlabels=['significant up', 'not significant', 'significant down'], theme=None):
  8. _x = r'$ log_{2}(Fold Change)$'
  9. _y = r'$ -log_{10}(P-value)$'
  10. color = color
  11. # check if dataframe contains any non-numeric character
  12. assert general.check_for_nonnumeric(df[lfc]) == 0, 'dataframe contains non-numeric values in lfc column'
  13. assert general.check_for_nonnumeric(df[pv]) == 0, 'dataframe contains non-numeric values in pv column'
  14. # this is important to check if color or logpv exists and drop them as if you run multiple times same command
  15. # it may update old instance of df
  16. df = df.drop(['color_add_axy', 'logpv_add_axy'], axis=1, errors='ignore')
  17. assert len(set(color)) == 3, 'unique color must be size of 3'
  18. df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'color_add_axy'] = color[0] # upregulated
  19. df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'color_add_axy'] = color[2] # downregulated
  20. df['color_add_axy'].fillna(color[1], inplace=True) # intermediate
  21. df['logpv_add_axy'] = -(np.log10(df[pv]))
  22. # plot
  23. assign_values = {col: i for i, col in enumerate(color)}
  24. color_result_num = [assign_values[i] for i in df['color_add_axy']]
  25. assert len(set(color_result_num)) == 3, \
  26. 'either significant or non-significant genes are missing; try to change lfc_thr or pv_thr to include ' \
  27. 'both significant and non-significant genes'
  28. if theme == 'dark':
  29. general.dark_bg()
  30. plt.subplots(figsize=dim)
  31. if plotlegend:
  32. s = plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
  33. s=dotsize, marker=markerdot)
  34. assert len(legendlabels) == 3, 'legendlabels must be size of 3'
  35. plt.legend(handles=s.legend_elements()[0], labels=legendlabels, loc=legendpos, bbox_to_anchor=legendanchor)
  36. else:
  37. plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
  38. s=dotsize, marker=markerdot)
  39. if sign_line:
  40. plt.axhline(y=-np.log10(pv_thr[0]), linestyle='--', color='#7d7d7d', linewidth=1)
  41. plt.axvline(x=lfc_thr[0], linestyle='--', color='#7d7d7d', linewidth=1)
  42. plt.axvline(x=-lfc_thr[1], linestyle='--', color='#7d7d7d', linewidth=1)
  43. GeneExpression.gene_plot(df, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle)
  44. if axxlabel:
  45. _x = axxlabel
  46. if axylabel:
  47. _y = axylabel
  48. general.axis_labels(_x, _y, axlabelfontsize, axlabelfontname)
  49. general.axis_ticks(xlm, ylm, axtickfontsize, axtickfontname, ar)
  50. general.get_figure(show, r, figtype, figname, theme)