application_4.py 765 B

123456789101112131415161718192021
  1. def calc_feature_distribution(feature_idx_list):
  2. # get col of selected features
  3. feature_idx_list = [int(i) for i in feature_idx_list]
  4. selected_features = tfidf_vectors[:, feature_idx_list]
  5. # get comics (rows) where selected features are nonzero
  6. nonzero_comics, _ = selected_features.nonzero()
  7. nonzero_comics, comic_counts = np.unique(nonzero_comics, return_counts=True)
  8. nonzero_comics = nonzero_comics + 1
  9. single = []
  10. both = []
  11. for idx,comic_sn in enumerate(nonzero_comics):
  12. if comic_counts[idx] > 1:
  13. both.append(str(comic_sn))
  14. else:
  15. single.append(str(comic_sn))
  16. feature_dict = dict(single=single, both=both)
  17. feature_data = json.dumps([feature_dict])
  18. return feature_data