def extract_codebook(dataset='train'): f = open(data_path + dataset + '_list.txt', 'r') i = 0 for file_name in f: i = i + 1 if not (i % 10): print(i) # load audio file file_name = file_name.rstrip('\n') file_path = data_path + file_name # #print file_path y0, sr = librosa.load(file_path, sr=22050) # we use first 1 second half = len(y0) / 4 y = y0[:round(half)] # STFT S_full, phase = librosa.magphase(librosa.stft(y, n_fft=1024, window='hann', hop_length=256, win_length=1024)) n = len(y) # Check the shape of matrix: row must corresponds to the example index !!! X = S_full.T # codebook by using K-Means Clustering K = 20 kmeans = KMeans(n_clusters=K, random_state=0).fit(X) features_kmeans = np.zeros(X.shape[0]) # for each sample, summarize feature!!! codebook = np.zeros(K) for sample in range(X.shape[0]): features_kmeans[sample] = kmeans.labels_[sample] # codebook histogram! unique, counts = np.unique(features_kmeans, return_counts=True) for u in unique: u = int(u) codebook[u] = counts[u] # save mfcc as a file file_name = file_name.replace('.wav', '.npy') save_file = codebook_path + file_name if not os.path.exists(os.path.dirname(save_file)): os.makedirs(os.path.dirname(save_file)) np.save(save_file, codebook) f.close()