12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- def extract_codebook(dataset='train'):
- f = open(data_path + dataset + '_list.txt', 'r')
- i = 0
- for file_name in f:
- i = i + 1
- if not (i % 10):
- print(i)
- # load audio file
- file_name = file_name.rstrip('\n')
- file_path = data_path + file_name
- # #print file_path
- y0, sr = librosa.load(file_path, sr=22050)
- # we use first 1 second
- half = len(y0) / 4
- y = y0[:round(half)]
- # STFT
- S_full, phase = librosa.magphase(librosa.stft(y, n_fft=1024, window='hann', hop_length=256, win_length=1024))
- n = len(y)
- # Check the shape of matrix: row must corresponds to the example index !!!
- X = S_full.T
- # codebook by using K-Means Clustering
- K = 20
- kmeans = KMeans(n_clusters=K, random_state=0).fit(X)
- features_kmeans = np.zeros(X.shape[0])
- # for each sample, summarize feature!!!
- codebook = np.zeros(K)
- for sample in range(X.shape[0]):
- features_kmeans[sample] = kmeans.labels_[sample]
- # codebook histogram!
- unique, counts = np.unique(features_kmeans, return_counts=True)
- for u in unique:
- u = int(u)
- codebook[u] = counts[u]
- # save mfcc as a file
- file_name = file_name.replace('.wav', '.npy')
- save_file = codebook_path + file_name
- if not os.path.exists(os.path.dirname(save_file)):
- os.makedirs(os.path.dirname(save_file))
- np.save(save_file, codebook)
- f.close()
|