123456789101112131415161718192021 |
- def calc_feature_distribution(feature_idx_list):
- # get col of selected features
- feature_idx_list = [int(i) for i in feature_idx_list]
- selected_features = tfidf_vectors[:, feature_idx_list]
- # get comics (rows) where selected features are nonzero
- nonzero_comics, _ = selected_features.nonzero()
- nonzero_comics, comic_counts = np.unique(nonzero_comics, return_counts=True)
- nonzero_comics = nonzero_comics + 1
- single = []
- both = []
- for idx,comic_sn in enumerate(nonzero_comics):
- if comic_counts[idx] > 1:
- both.append(str(comic_sn))
- else:
- single.append(str(comic_sn))
- feature_dict = dict(single=single, both=both)
- feature_data = json.dumps([feature_dict])
- return feature_data
|