visualize_5_6.py 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. def ma(df="dataframe", lfc=None, ct_count=None, st_count=None, basemean=None, pv=None, lfc_thr=(1, 1), pv_thr=0.05,
  2. valpha=1, dotsize=8,markerdot="o", dim=(6, 5), r=300, show=False, color=("green", "grey", "red"), ar=0,
  3. figtype='png',axtickfontsize=9, axtickfontname="Arial", axlabelfontsize=9, axlabelfontname="Arial",
  4. axxlabel=None, axylabel=None, xlm=None, ylm=None, fclines=False, fclinescolor='#2660a4', legendpos='best',
  5. figname='ma', legendanchor=None, legendlabels=['significant up', 'not significant', 'significant down'],
  6. plotlegend=False, theme=None, geneid=None, genenames=None, gfont=8, gstyle=1, title=None):
  7. _x, _y = 'A', 'M'
  8. assert General.check_for_nonnumeric(df[lfc]) == 0, 'dataframe contains non-numeric values in lfc column'
  9. if ct_count and st_count:
  10. assert General.check_for_nonnumeric(df[ct_count]) == 0, \
  11. 'dataframe contains non-numeric values in ct_count column'
  12. assert General.check_for_nonnumeric(
  13. df[st_count]) == 0, 'dataframe contains non-numeric values in ct_count column'
  14. if basemean:
  15. assert General.check_for_nonnumeric(df[basemean]) == 0, \
  16. 'dataframe contains non-numeric values in basemean column'
  17. # this is important to check if color or A exists and drop them as if you run multiple times same command
  18. # it may update old instance of df
  19. df = df.drop(['color_add_axy', 'A_add_axy'], axis=1, errors='ignore')
  20. assert len(set(color)) == 3, 'unique color must be size of 3'
  21. df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr), 'color_add_axy'] = color[0] # upregulated
  22. df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr), 'color_add_axy'] = color[2] # downregulated
  23. df['color_add_axy'].fillna(color[1], inplace=True) # intermediate
  24. if basemean:
  25. # basemean (mean of normalized counts from DESeq2 results)
  26. df['A_add_axy'] = df[basemean]
  27. else:
  28. df['A_add_axy'] = (np.log2(df[ct_count]) + np.log2(df[st_count])) / 2
  29. # plot
  30. assign_values = {col: i for i, col in enumerate(color)}
  31. color_result_num = [assign_values[i] for i in df['color_add_axy']]
  32. assert len(
  33. set(color_result_num)) == 3, 'either significant or non-significant genes are missing; try to change lfc_thr' \
  34. ' to include both significant and non-significant genes'
  35. if theme:
  36. General.style_bg(theme)
  37. plt.subplots(figsize=dim)
  38. if plotlegend:
  39. s = plt.scatter(df['A_add_axy'], df[lfc], c=color_result_num, cmap=ListedColormap(color),
  40. alpha=valpha, s=dotsize, marker=markerdot)
  41. assert len(legendlabels) == 3, 'legendlabels must be size of 3'
  42. plt.legend(handles=s.legend_elements()[0], labels=legendlabels, loc=legendpos,
  43. bbox_to_anchor=legendanchor)
  44. else:
  45. plt.scatter(df['A_add_axy'], df[lfc], c=color_result_num, cmap=ListedColormap(color),
  46. alpha=valpha, s=dotsize, marker=markerdot)
  47. # draw a central line at M=0
  48. plt.axhline(y=0, color='#7d7d7d', linestyle='--')
  49. # draw lfc threshold lines
  50. if fclines:
  51. plt.axhline(y=lfc_thr[0], color=fclinescolor, linestyle='--')
  52. plt.axhline(y=-lfc_thr[1], color=fclinescolor, linestyle='--')
  53. if axxlabel:
  54. _x = axxlabel
  55. if axylabel:
  56. _y = axylabel
  57. GeneExpression.geneplot_ma(df, geneid, lfc, lfc_thr, genenames, gfont, gstyle)
  58. General.axis_labels(_x, _y, axlabelfontsize, axlabelfontname)
  59. General.axis_ticks(xlm, ylm, axtickfontsize, axtickfontname, ar)
  60. General.get_figure(show, r, figtype, figname, theme, title)