def calc_feature_distribution(feature_idx_list): # get col of selected features feature_idx_list = [int(i) for i in feature_idx_list] selected_features = tfidf_vectors[:, feature_idx_list] # get comics (rows) where selected features are nonzero nonzero_comics, _ = selected_features.nonzero() nonzero_comics, comic_counts = np.unique(nonzero_comics, return_counts=True) nonzero_comics = nonzero_comics + 1 single = [] both = [] for idx,comic_sn in enumerate(nonzero_comics): if comic_counts[idx] > 1: both.append(str(comic_sn)) else: single.append(str(comic_sn)) feature_dict = dict(single=single, both=both) feature_data = json.dumps([feature_dict]) return feature_data