def calc_barchart_data(picked_idx, selected_idx): """ Get top words and vales based off of a_idx Then for those words get values for b_idx word_data type: 'scipy.sparse.csr.csr_matrix' sum word data so total tfidf value for word word_data type: 'numpy.matrix' transform 'numpy.matrix' to 'numpy.ndarray' """ len_output = 30 word_data_picked = get_summed_tfidf(picked_idx, len_output) word_data_selected = get_summed_tfidf(selected_idx, len_output) word_data_all = get_summed_tfidf([-1], len_output) top_word_idxs_selected = np.argpartition(word_data_selected, -len_output)[-len_output:] top_word_idxs_selected = top_word_idxs_selected[np.argsort(word_data_selected[top_word_idxs_selected])] top_word_vals_selected = word_data_selected[top_word_idxs_selected] top_word_vals_picked = word_data_picked[top_word_idxs_selected] top_word_vals_all = word_data_all[top_word_idxs_selected] top_words_selected = [tfidf_feature_names[i] for i in top_word_idxs_selected] # labels = ["word", "tfidf"] labels = ["name", "value"] top_word_vals = zip(top_word_vals_picked, top_word_vals_selected, top_word_vals_all) tfidf_zipped = zip(top_words_selected, top_word_vals) tfidf_dict = [dict(zip(labels, row)) for row in tfidf_zipped] barchart_data = json.dumps([tfidf_dict]) return barchart_data