LiuFan
/
PrivacyScanData


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546
							def extract_codebook(dataset='train'):
    f = open(data_path + dataset + '_list.txt', 'r')
    i = 0
    for file_name in f:
        i = i + 1
        if not (i % 10):
            print(i)
        # load audio file
        file_name = file_name.rstrip('\n')
        file_path = data_path + file_name
        # #print file_path
        y0, sr = librosa.load(file_path, sr=22050)
        # we use first 1 second
        half = len(y0) / 4
        y = y0[:round(half)]
        # STFT
        S_full, phase = librosa.magphase(librosa.stft(y, n_fft=1024, window='hann', hop_length=256, win_length=1024))
        n = len(y)

        # Check the shape of matrix: row must corresponds to the example index !!!
        X = S_full.T

        # codebook by using K-Means Clustering
        K = 20
        kmeans = KMeans(n_clusters=K, random_state=0).fit(X)
        features_kmeans = np.zeros(X.shape[0])
        # for each sample, summarize feature!!!
        codebook = np.zeros(K)
        for sample in range(X.shape[0]):
            features_kmeans[sample] = kmeans.labels_[sample]

        # codebook histogram!
        unique, counts = np.unique(features_kmeans, return_counts=True)

        for u in unique:
            u = int(u)
            codebook[u] = counts[u]
        # save mfcc as a file
        file_name = file_name.replace('.wav', '.npy')
        save_file = codebook_path + file_name

        if not os.path.exists(os.path.dirname(save_file)):
            os.makedirs(os.path.dirname(save_file))
        np.save(save_file, codebook)

    f.close()