LiuFan
/
GnnForPrivacyScan


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
							def singlebar(df='dataframe', dim=(6, 4), bw=0.4, colorbar='#f2aa4cff', hbsize=4, r=300, ar=(0, 0), valphabar=1,
              errorbar=True, show=False, ylm=None, axtickfontsize=9, axtickfontname='Arial', ax_x_ticklabel=None,
              axlabelfontsize=9, axlabelfontname='Arial', yerrlw=None, yerrcw=None, axxlabel=None, axylabel=None,
              figtype='png', add_sign_line=False, pv=None,
              sign_line_opts={'symbol': '*', 'fontsize': 9, 'linewidth': 0.5, 'arrowstyle': '-', 'fontname':'Arial'},
              sign_line_pvals=False,
              add_sign_symbol=False, sign_symbol_opts={'symbol': '*', 'fontsize': 9, 'rotation':0, 'fontname':'Arial'},
              sign_line_pairs=None, sub_cat=None, sub_cat_opts={'y_neg_dist': 3.5, 'fontsize': 9, 'fontname':'Arial'},
              sub_cat_label_dist=None, symb_dist=None, group_let=None, df_format=None, samp_col_name=None,
              col_order=False, dotplot=False, dotsize=6, colordot=['#101820ff'], valphadot=1, markerdot='o',
              sign_line_pairs_dist=None, sign_line_pv_symb_dist=None, div_fact=20, add_text=None,
              figname='singlebar', connectionstyle='bar, armA=50, armB=50, angle=180, fraction=0',
              std_errs_vis='both', yerrzorder=8, theme=None):
    plt.rcParams['mathtext.fontset'] = 'custom'
    plt.rcParams['mathtext.default'] = 'regular'
    plt.rcParams['mathtext.it'] = 'Arial:italic'
    plt.rcParams['mathtext.bf'] = 'Arial:italic:bold'

    # set axis labels to None
    _x = None
    _y = None
    if df_format == 'stack':
        # sample_list = df[samp_col_name].unique()
        if samp_col_name is None:
            raise ValueError('sample column name required')
        df_mean = df.groupby(samp_col_name).mean().reset_index().set_index(samp_col_name).T
        df_sem = df.groupby(samp_col_name).sem().reset_index().set_index(samp_col_name).T
        if col_order:
            df_mean = df_mean[df[samp_col_name].unique()]
            df_sem = df_sem[df[samp_col_name].unique()]
        bar_h = df_mean.iloc[0]
        bar_se = df_sem.iloc[0]
        sample_list = df_mean.columns.to_numpy()
        # get minimum from df
        min_value = (0, df_mean.iloc[0].min())[df_mean.iloc[0].min() < 0]
    else:
        bar_h = df.describe().loc['mean']
        bar_se = df.sem()
        bar_counts = df.describe().loc['count']
        sample_list = df.columns.to_numpy()
        min_value = (0, min(df.min()))[min(df.min()) < 0]

    if std_errs_vis == 'upper':
        std_errs_vis = [len(bar_se)*[0], bar_se]
    elif std_errs_vis == 'lower':
        std_errs_vis = [bar_se, len(bar_se)*[0]]
    elif std_errs_vis == 'both':
        std_errs_vis = bar_se
    else:
        raise ValueError('In valid value for the std_errs_vis')

    xbar = np.arange(len(sample_list))
    color_list_bar = colorbar
    if theme == 'dark':
        general.dark_bg()
    plt.subplots(figsize=dim)
    if errorbar:
        plt.bar(x=xbar, height=bar_h, yerr=std_errs_vis, width=bw, color=color_list_bar,
                capsize=hbsize, alpha=valphabar, zorder=5, error_kw={'elinewidth': yerrlw, 'capthick': yerrcw,
                                                                      'zorder': yerrzorder})
    else:
        plt.bar(x=xbar, height=bar_h, width=bw, color=color_list_bar, capsize=hbsize, alpha=valphabar)

    if ax_x_ticklabel:
        x_ticklabel = ax_x_ticklabel
    else:
        x_ticklabel = sample_list

    plt.xticks(ticks=xbar, labels=x_ticklabel, fontsize=axtickfontsize, rotation=ar[0], fontname=axtickfontname)
    if axxlabel:
        _x = axxlabel
    if axylabel:
        _y = axylabel
    general.axis_labels(_x, _y, axlabelfontsize, axlabelfontname)
    # ylm must be tuple of start, end, interval
    if ylm:
        plt.ylim(bottom=ylm[0], top=ylm[1])
        plt.yticks(np.arange(ylm[0], ylm[1], ylm[2]), fontsize=axtickfontsize, fontname=axtickfontname)
    plt.yticks(fontsize=axtickfontsize, rotation=ar[1], fontname=axtickfontname)

    color_list_dot = colordot
    if len(color_list_dot) == 1:
        color_list_dot = colordot * len(sample_list)
    # checked for unstacked data
    if dotplot:
        for cols in range(len(sample_list)):
            plt.scatter(
                x=np.linspace(xbar[cols] - bw / 2, xbar[cols] + bw / 2, int(bar_counts[cols])),
                y=df[df.columns[cols]].dropna(), s=dotsize, color=color_list_dot[cols], zorder=10, alpha=valphadot,
                marker=markerdot)

    size_factor_to_start_line = max(bar_h) / div_fact
    # for only adjacent bars (not for multiple bars with single control)
    if add_sign_line:
        for i in xbar:
            if i % 2 != 0:
                continue
            x_pos = xbar[i]
            x_pos_2 = xbar[i+1]
            y_pos = df.describe().loc['mean'].to_numpy()[i] + df.sem().to_numpy()[i]
            y_pos_2 = df.describe().loc['mean'].to_numpy()[i+1] + df.sem().to_numpy()[i+1]
            # only if y axis is positive; in future make a function to call it (2 times used)
            if y_pos > 0:
                y_pos += size_factor_to_start_line
                y_pos_2 += size_factor_to_start_line
                pv_symb = general.pvalue_symbol(pv[int(i/2)], sign_line_opts['symbol'])
                if pv_symb:
                    plt.annotate('', xy=(x_pos, max(y_pos, y_pos_2)), xytext=(x_pos_2, max(y_pos, y_pos_2)),
                                arrowprops={'connectionstyle': connectionstyle,
                                            'arrowstyle': sign_line_opts['arrowstyle'],
                                            'linewidth': sign_line_opts['linewidth']})
                    plt.annotate(pv_symb, xy=(np.mean([x_pos, x_pos_2]), max(y_pos, y_pos_2) +
                                             sign_line_opts['dist_y_pos']),
                                fontsize=sign_line_opts['fontsize'], ha="center")

    # for only adjacent bars with one control but multiple treatments
    # need to work for sign_line_pairs (update df on line 1276)
    p_index = 0
    y_pos_dict = dict()
    y_pos_dict_trt = dict()
    if sign_line_pairs:
        for i in sign_line_pairs:
            y_pos_adj = 0
            x_pos = xbar[i[0]]
            x_pos_2 = xbar[i[1]]
            y_pos = df.describe().loc['mean'].to_numpy()[i[0]] + df.sem().to_numpy()[i[0]]
            y_pos_2 = df.describe().loc['mean'].to_numpy()[i[1]] + df.sem().to_numpy()[i[1]]
            # only if y axis is positive; in future make a function to call it (2 times used)
            if y_pos > 0:
                y_pos += size_factor_to_start_line/2
                y_pos_2 += size_factor_to_start_line/2
                # check if the mean of y_pos is not lesser than not other treatments which lies between
                # eg if 0-1 has higher sign bar than the 0-2
                if i[0] in y_pos_dict_trt:
                    y_pos_adj = 1
                    if y_pos_2 <= y_pos_dict_trt[i[0]][1]:
                        if sign_line_pairs_dist:
                            y_pos_2 += (y_pos_dict_trt[i[0]][1] - y_pos_2) + (3 * size_factor_to_start_line) + \
                                   sign_line_pairs_dist[p_index]
                        else:
                            y_pos_2 += (y_pos_dict_trt[i[0]][1] - y_pos_2) + (3 * size_factor_to_start_line)
                    elif y_pos <= y_pos_dict_trt[i[0]][0]:
                        if sign_line_pairs_dist:
                            y_pos += 3 * size_factor_to_start_line + sign_line_pairs_dist[p_index]
                        else:
                            y_pos += 3 * size_factor_to_start_line
                # check if difference is not equivalent between two y_pos
                # if yes add some distance, so that sign bar will not overlap
                if i[0] in y_pos_dict:
                    y_pos_adj = 1
                    if 0.75 < df.describe().loc['mean'].to_numpy()[i[0]]/df.describe().loc['mean'].to_numpy()[i[1]] < 1.25:
                        if sign_line_pairs_dist:
                            y_pos += 2 * size_factor_to_start_line + sign_line_pairs_dist[p_index]
                        else:
                            y_pos += 2 * size_factor_to_start_line

                if y_pos_adj == 0 and sign_line_pairs_dist:
                    if y_pos >= y_pos_2:
                        y_pos += sign_line_pairs_dist[p_index]
                    else:
                        y_pos_2 += sign_line_pairs_dist[p_index]

                # sign_line_pvals passed, used p values instead of symbols
                if sign_line_pvals:
                    pv_symb = '$\it{p}$'+ str(pv[p_index])
                else:
                    pv_symb = general.pvalue_symbol(pv[p_index], sign_line_opts['symbol'])
                y_pos_dict[i[0]] = y_pos
                y_pos_dict_trt[i[0]] = [y_pos, y_pos_2]
                if pv_symb:
                    plt.annotate('', xy=(x_pos, max(y_pos, y_pos_2)), xytext=(x_pos_2, max(y_pos, y_pos_2)),
                                 arrowprops={'connectionstyle': connectionstyle,
                                                 'arrowstyle': sign_line_opts['arrowstyle'],
                                                 'linewidth': sign_line_opts['linewidth']})
                    # here size factor size_factor_to_start_line added instead of sign_line_opts['dist_y_pos']
                    # make this change everywhere in future release
                    plt.annotate(pv_symb, xy=(np.mean([x_pos, x_pos_2]), max(y_pos, y_pos_2) +
                                              size_factor_to_start_line + sign_line_pv_symb_dist[p_index]),
                                 fontsize=sign_line_opts['fontsize'], ha="center")
                p_index += 1

    if add_sign_symbol:
        for i in xbar:
            x_pos = xbar[i]
            # y_pos = df.describe().loc['mean'].to_numpy()[i] + df.sem().to_numpy()[i] + size_factor_to_start_line

            if symb_dist:
                y_pos = bar_h.to_numpy()[i] + bar_se.to_numpy()[i] + \
                        size_factor_to_start_line + symb_dist[i]
            else:
                y_pos = bar_h.to_numpy()[i] + bar_se.to_numpy()[i] + \
                        size_factor_to_start_line

            # group_let list
            if isinstance(group_let, list):
                if y_pos > 0:
                    plt.annotate(group_let[i], xy=(x_pos, y_pos),
                                 fontsize=sign_symbol_opts['fontsize'], ha="center",
                                 rotation=sign_symbol_opts['rotation'], fontfamily=sign_symbol_opts['fontname'])

            # only if y axis is positive
            if pv:
                if y_pos > 0:
                    pv_symb = general.pvalue_symbol(pv[i], sign_symbol_opts['symbol'])
                    if pv_symb:
                        plt.annotate(pv_symb, xy=(x_pos, y_pos), fontsize=sign_symbol_opts['fontsize'], ha="center",
                                     rotation=sign_symbol_opts['rotation'], fontfamily=sign_symbol_opts['fontname'])

    sub_cat_i = 0
    if sub_cat:
        if isinstance(sub_cat, dict):
            for k in sub_cat:
                if isinstance(k, tuple) and len(k) == 2:
                    cat_x_pos, cat_y_pos, cat_x_pos_2 = k[0], min_value - \
                                                        (sub_cat_opts['y_neg_dist']*size_factor_to_start_line), k[1]
                    plt.annotate('', xy=(cat_x_pos-(bw/2), cat_y_pos), xytext=(cat_x_pos_2+(bw/2), cat_y_pos),
                                 arrowprops={'arrowstyle': '-', 'linewidth': 0.5}, annotation_clip=False)
                    if sub_cat_label_dist and isinstance(sub_cat_label_dist, list):
                        plt.annotate(sub_cat[k], xy=(np.mean([cat_x_pos, cat_x_pos_2]),
                                                     cat_y_pos - size_factor_to_start_line - sub_cat_label_dist[sub_cat_i]),
                                     ha="center", fontsize=sub_cat_opts['fontsize'], annotation_clip=False,
                                     fontfamily=sub_cat_opts['fontname'])
                        sub_cat_i += 1
                    else:
                        plt.annotate(sub_cat[k], xy=(np.mean([cat_x_pos, cat_x_pos_2]),
                                                 cat_y_pos-size_factor_to_start_line),
                                 ha="center", fontsize=sub_cat_opts['fontsize'], annotation_clip=False,
                                     fontfamily=sub_cat_opts['fontname'])
                else:
                    raise KeyError("Sub category keys must be tuple of size 2")

    if isinstance(add_text, list):
        plt.text(add_text[0], add_text[1], add_text[2], fontsize=9, fontfamily='Arial')

    general.get_figure(show, r, figtype, figname, theme)