application_5.py 1.4 KB

1234567891011121314151617181920212223242526272829303132
  1. def calc_barchart_data(picked_idx, selected_idx):
  2. """
  3. Get top words and vales based off of a_idx
  4. Then for those words get values for b_idx
  5. word_data type: 'scipy.sparse.csr.csr_matrix'
  6. sum word data so total tfidf value for word
  7. word_data type: 'numpy.matrix'
  8. transform 'numpy.matrix' to 'numpy.ndarray'
  9. """
  10. len_output = 30
  11. word_data_picked = get_summed_tfidf(picked_idx, len_output)
  12. word_data_selected = get_summed_tfidf(selected_idx, len_output)
  13. word_data_all = get_summed_tfidf([-1], len_output)
  14. top_word_idxs_selected = np.argpartition(word_data_selected, -len_output)[-len_output:]
  15. top_word_idxs_selected = top_word_idxs_selected[np.argsort(word_data_selected[top_word_idxs_selected])]
  16. top_word_vals_selected = word_data_selected[top_word_idxs_selected]
  17. top_word_vals_picked = word_data_picked[top_word_idxs_selected]
  18. top_word_vals_all = word_data_all[top_word_idxs_selected]
  19. top_words_selected = [tfidf_feature_names[i] for i in top_word_idxs_selected]
  20. # labels = ["word", "tfidf"]
  21. labels = ["name", "value"]
  22. top_word_vals = zip(top_word_vals_picked, top_word_vals_selected, top_word_vals_all)
  23. tfidf_zipped = zip(top_words_selected, top_word_vals)
  24. tfidf_dict = [dict(zip(labels, row)) for row in tfidf_zipped]
  25. barchart_data = json.dumps([tfidf_dict])
  26. return barchart_data