123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- def singlebar(df='dataframe', dim=(6, 4), bw=0.4, colorbar='#f2aa4cff', hbsize=4, r=300, ar=(0, 0), valphabar=1,
- errorbar=True, show=False, ylm=None, axtickfontsize=9, axtickfontname='Arial', ax_x_ticklabel=None,
- axlabelfontsize=9, axlabelfontname='Arial', yerrlw=None, yerrcw=None, axxlabel=None, axylabel=None,
- figtype='png', add_sign_line=False, pv=None,
- sign_line_opts={'symbol': '*', 'fontsize': 9, 'linewidth': 0.5, 'arrowstyle': '-', 'fontname':'Arial'},
- sign_line_pvals=False,
- add_sign_symbol=False, sign_symbol_opts={'symbol': '*', 'fontsize': 9, 'rotation':0, 'fontname':'Arial'},
- sign_line_pairs=None, sub_cat=None, sub_cat_opts={'y_neg_dist': 3.5, 'fontsize': 9, 'fontname':'Arial'},
- sub_cat_label_dist=None, symb_dist=None, group_let=None, df_format=None, samp_col_name=None,
- col_order=False, dotplot=False, dotsize=6, colordot=['#101820ff'], valphadot=1, markerdot='o',
- sign_line_pairs_dist=None, sign_line_pv_symb_dist=None, div_fact=20, add_text=None,
- figname='singlebar', connectionstyle='bar, armA=50, armB=50, angle=180, fraction=0',
- std_errs_vis='both', yerrzorder=8, theme=None):
- plt.rcParams['mathtext.fontset'] = 'custom'
- plt.rcParams['mathtext.default'] = 'regular'
- plt.rcParams['mathtext.it'] = 'Arial:italic'
- plt.rcParams['mathtext.bf'] = 'Arial:italic:bold'
- # set axis labels to None
- _x = None
- _y = None
- if df_format == 'stack':
- # sample_list = df[samp_col_name].unique()
- if samp_col_name is None:
- raise ValueError('sample column name required')
- df_mean = df.groupby(samp_col_name).mean().reset_index().set_index(samp_col_name).T
- df_sem = df.groupby(samp_col_name).sem().reset_index().set_index(samp_col_name).T
- if col_order:
- df_mean = df_mean[df[samp_col_name].unique()]
- df_sem = df_sem[df[samp_col_name].unique()]
- bar_h = df_mean.iloc[0]
- bar_se = df_sem.iloc[0]
- sample_list = df_mean.columns.to_numpy()
- # get minimum from df
- min_value = (0, df_mean.iloc[0].min())[df_mean.iloc[0].min() < 0]
- else:
- bar_h = df.describe().loc['mean']
- bar_se = df.sem()
- bar_counts = df.describe().loc['count']
- sample_list = df.columns.to_numpy()
- min_value = (0, min(df.min()))[min(df.min()) < 0]
- if std_errs_vis == 'upper':
- std_errs_vis = [len(bar_se)*[0], bar_se]
- elif std_errs_vis == 'lower':
- std_errs_vis = [bar_se, len(bar_se)*[0]]
- elif std_errs_vis == 'both':
- std_errs_vis = bar_se
- else:
- raise ValueError('In valid value for the std_errs_vis')
- xbar = np.arange(len(sample_list))
- color_list_bar = colorbar
- if theme == 'dark':
- general.dark_bg()
- plt.subplots(figsize=dim)
- if errorbar:
- plt.bar(x=xbar, height=bar_h, yerr=std_errs_vis, width=bw, color=color_list_bar,
- capsize=hbsize, alpha=valphabar, zorder=5, error_kw={'elinewidth': yerrlw, 'capthick': yerrcw,
- 'zorder': yerrzorder})
- else:
- plt.bar(x=xbar, height=bar_h, width=bw, color=color_list_bar, capsize=hbsize, alpha=valphabar)
- if ax_x_ticklabel:
- x_ticklabel = ax_x_ticklabel
- else:
- x_ticklabel = sample_list
- plt.xticks(ticks=xbar, labels=x_ticklabel, fontsize=axtickfontsize, rotation=ar[0], fontname=axtickfontname)
- if axxlabel:
- _x = axxlabel
- if axylabel:
- _y = axylabel
- general.axis_labels(_x, _y, axlabelfontsize, axlabelfontname)
- # ylm must be tuple of start, end, interval
- if ylm:
- plt.ylim(bottom=ylm[0], top=ylm[1])
- plt.yticks(np.arange(ylm[0], ylm[1], ylm[2]), fontsize=axtickfontsize, fontname=axtickfontname)
- plt.yticks(fontsize=axtickfontsize, rotation=ar[1], fontname=axtickfontname)
- color_list_dot = colordot
- if len(color_list_dot) == 1:
- color_list_dot = colordot * len(sample_list)
- # checked for unstacked data
- if dotplot:
- for cols in range(len(sample_list)):
- plt.scatter(
- x=np.linspace(xbar[cols] - bw / 2, xbar[cols] + bw / 2, int(bar_counts[cols])),
- y=df[df.columns[cols]].dropna(), s=dotsize, color=color_list_dot[cols], zorder=10, alpha=valphadot,
- marker=markerdot)
- size_factor_to_start_line = max(bar_h) / div_fact
- # for only adjacent bars (not for multiple bars with single control)
- if add_sign_line:
- for i in xbar:
- if i % 2 != 0:
- continue
- x_pos = xbar[i]
- x_pos_2 = xbar[i+1]
- y_pos = df.describe().loc['mean'].to_numpy()[i] + df.sem().to_numpy()[i]
- y_pos_2 = df.describe().loc['mean'].to_numpy()[i+1] + df.sem().to_numpy()[i+1]
- # only if y axis is positive; in future make a function to call it (2 times used)
- if y_pos > 0:
- y_pos += size_factor_to_start_line
- y_pos_2 += size_factor_to_start_line
- pv_symb = general.pvalue_symbol(pv[int(i/2)], sign_line_opts['symbol'])
- if pv_symb:
- plt.annotate('', xy=(x_pos, max(y_pos, y_pos_2)), xytext=(x_pos_2, max(y_pos, y_pos_2)),
- arrowprops={'connectionstyle': connectionstyle,
- 'arrowstyle': sign_line_opts['arrowstyle'],
- 'linewidth': sign_line_opts['linewidth']})
- plt.annotate(pv_symb, xy=(np.mean([x_pos, x_pos_2]), max(y_pos, y_pos_2) +
- sign_line_opts['dist_y_pos']),
- fontsize=sign_line_opts['fontsize'], ha="center")
- # for only adjacent bars with one control but multiple treatments
- # need to work for sign_line_pairs (update df on line 1276)
- p_index = 0
- y_pos_dict = dict()
- y_pos_dict_trt = dict()
- if sign_line_pairs:
- for i in sign_line_pairs:
- y_pos_adj = 0
- x_pos = xbar[i[0]]
- x_pos_2 = xbar[i[1]]
- y_pos = df.describe().loc['mean'].to_numpy()[i[0]] + df.sem().to_numpy()[i[0]]
- y_pos_2 = df.describe().loc['mean'].to_numpy()[i[1]] + df.sem().to_numpy()[i[1]]
- # only if y axis is positive; in future make a function to call it (2 times used)
- if y_pos > 0:
- y_pos += size_factor_to_start_line/2
- y_pos_2 += size_factor_to_start_line/2
- # check if the mean of y_pos is not lesser than not other treatments which lies between
- # eg if 0-1 has higher sign bar than the 0-2
- if i[0] in y_pos_dict_trt:
- y_pos_adj = 1
- if y_pos_2 <= y_pos_dict_trt[i[0]][1]:
- if sign_line_pairs_dist:
- y_pos_2 += (y_pos_dict_trt[i[0]][1] - y_pos_2) + (3 * size_factor_to_start_line) + \
- sign_line_pairs_dist[p_index]
- else:
- y_pos_2 += (y_pos_dict_trt[i[0]][1] - y_pos_2) + (3 * size_factor_to_start_line)
- elif y_pos <= y_pos_dict_trt[i[0]][0]:
- if sign_line_pairs_dist:
- y_pos += 3 * size_factor_to_start_line + sign_line_pairs_dist[p_index]
- else:
- y_pos += 3 * size_factor_to_start_line
- # check if difference is not equivalent between two y_pos
- # if yes add some distance, so that sign bar will not overlap
- if i[0] in y_pos_dict:
- y_pos_adj = 1
- if 0.75 < df.describe().loc['mean'].to_numpy()[i[0]]/df.describe().loc['mean'].to_numpy()[i[1]] < 1.25:
- if sign_line_pairs_dist:
- y_pos += 2 * size_factor_to_start_line + sign_line_pairs_dist[p_index]
- else:
- y_pos += 2 * size_factor_to_start_line
- if y_pos_adj == 0 and sign_line_pairs_dist:
- if y_pos >= y_pos_2:
- y_pos += sign_line_pairs_dist[p_index]
- else:
- y_pos_2 += sign_line_pairs_dist[p_index]
- # sign_line_pvals passed, used p values instead of symbols
- if sign_line_pvals:
- pv_symb = '$\it{p}$'+ str(pv[p_index])
- else:
- pv_symb = general.pvalue_symbol(pv[p_index], sign_line_opts['symbol'])
- y_pos_dict[i[0]] = y_pos
- y_pos_dict_trt[i[0]] = [y_pos, y_pos_2]
- if pv_symb:
- plt.annotate('', xy=(x_pos, max(y_pos, y_pos_2)), xytext=(x_pos_2, max(y_pos, y_pos_2)),
- arrowprops={'connectionstyle': connectionstyle,
- 'arrowstyle': sign_line_opts['arrowstyle'],
- 'linewidth': sign_line_opts['linewidth']})
- # here size factor size_factor_to_start_line added instead of sign_line_opts['dist_y_pos']
- # make this change everywhere in future release
- plt.annotate(pv_symb, xy=(np.mean([x_pos, x_pos_2]), max(y_pos, y_pos_2) +
- size_factor_to_start_line + sign_line_pv_symb_dist[p_index]),
- fontsize=sign_line_opts['fontsize'], ha="center")
- p_index += 1
- if add_sign_symbol:
- for i in xbar:
- x_pos = xbar[i]
- # y_pos = df.describe().loc['mean'].to_numpy()[i] + df.sem().to_numpy()[i] + size_factor_to_start_line
- if symb_dist:
- y_pos = bar_h.to_numpy()[i] + bar_se.to_numpy()[i] + \
- size_factor_to_start_line + symb_dist[i]
- else:
- y_pos = bar_h.to_numpy()[i] + bar_se.to_numpy()[i] + \
- size_factor_to_start_line
- # group_let list
- if isinstance(group_let, list):
- if y_pos > 0:
- plt.annotate(group_let[i], xy=(x_pos, y_pos),
- fontsize=sign_symbol_opts['fontsize'], ha="center",
- rotation=sign_symbol_opts['rotation'], fontfamily=sign_symbol_opts['fontname'])
- # only if y axis is positive
- if pv:
- if y_pos > 0:
- pv_symb = general.pvalue_symbol(pv[i], sign_symbol_opts['symbol'])
- if pv_symb:
- plt.annotate(pv_symb, xy=(x_pos, y_pos), fontsize=sign_symbol_opts['fontsize'], ha="center",
- rotation=sign_symbol_opts['rotation'], fontfamily=sign_symbol_opts['fontname'])
- sub_cat_i = 0
- if sub_cat:
- if isinstance(sub_cat, dict):
- for k in sub_cat:
- if isinstance(k, tuple) and len(k) == 2:
- cat_x_pos, cat_y_pos, cat_x_pos_2 = k[0], min_value - \
- (sub_cat_opts['y_neg_dist']*size_factor_to_start_line), k[1]
- plt.annotate('', xy=(cat_x_pos-(bw/2), cat_y_pos), xytext=(cat_x_pos_2+(bw/2), cat_y_pos),
- arrowprops={'arrowstyle': '-', 'linewidth': 0.5}, annotation_clip=False)
- if sub_cat_label_dist and isinstance(sub_cat_label_dist, list):
- plt.annotate(sub_cat[k], xy=(np.mean([cat_x_pos, cat_x_pos_2]),
- cat_y_pos - size_factor_to_start_line - sub_cat_label_dist[sub_cat_i]),
- ha="center", fontsize=sub_cat_opts['fontsize'], annotation_clip=False,
- fontfamily=sub_cat_opts['fontname'])
- sub_cat_i += 1
- else:
- plt.annotate(sub_cat[k], xy=(np.mean([cat_x_pos, cat_x_pos_2]),
- cat_y_pos-size_factor_to_start_line),
- ha="center", fontsize=sub_cat_opts['fontsize'], annotation_clip=False,
- fontfamily=sub_cat_opts['fontname'])
- else:
- raise KeyError("Sub category keys must be tuple of size 2")
- if isinstance(add_text, list):
- plt.text(add_text[0], add_text[1], add_text[2], fontsize=9, fontfamily='Arial')
- general.get_figure(show, r, figtype, figname, theme)
|