1234567891011121314151617181920212223242526272829303132 |
- def calc_barchart_data(picked_idx, selected_idx):
- """
- Get top words and vales based off of a_idx
- Then for those words get values for b_idx
- word_data type: 'scipy.sparse.csr.csr_matrix'
- sum word data so total tfidf value for word
- word_data type: 'numpy.matrix'
- transform 'numpy.matrix' to 'numpy.ndarray'
- """
- len_output = 30
- word_data_picked = get_summed_tfidf(picked_idx, len_output)
- word_data_selected = get_summed_tfidf(selected_idx, len_output)
- word_data_all = get_summed_tfidf([-1], len_output)
- top_word_idxs_selected = np.argpartition(word_data_selected, -len_output)[-len_output:]
- top_word_idxs_selected = top_word_idxs_selected[np.argsort(word_data_selected[top_word_idxs_selected])]
- top_word_vals_selected = word_data_selected[top_word_idxs_selected]
- top_word_vals_picked = word_data_picked[top_word_idxs_selected]
- top_word_vals_all = word_data_all[top_word_idxs_selected]
- top_words_selected = [tfidf_feature_names[i] for i in top_word_idxs_selected]
- # labels = ["word", "tfidf"]
- labels = ["name", "value"]
- top_word_vals = zip(top_word_vals_picked, top_word_vals_selected, top_word_vals_all)
- tfidf_zipped = zip(top_words_selected, top_word_vals)
- tfidf_dict = [dict(zip(labels, row)) for row in tfidf_zipped]
- barchart_data = json.dumps([tfidf_dict])
- return barchart_data
|