def extract_spec(dataset='train'): f = open(data_path + dataset + '_list.txt', 'r') i = 0 for file_name in f: i = i + 1 if not (i % 10): print(i) # load audio file file_name = file_name.rstrip('\n') file_path = data_path + file_name # print file_path y0, sr = librosa.load(file_path, sr=22050) # we use first 1 second half = len(y0) / 4 y = y0[:round(half)] # mfcc mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=MFCC_DIM) # delta mfcc and double delta delta_mfcc = librosa.feature.delta(mfcc) ddelta_mfcc = librosa.feature.delta(mfcc, order=2) # STFT D = np.abs(librosa.core.stft(y, hop_length=512, n_fft=1024, win_length=1024)) D_dB = librosa.amplitude_to_db(D, ref=np.max) # mel spectrogram mel_S = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=128) S_dB = librosa.power_to_db(mel_S, ref=np.max) # log compression # spectral centroid spec_centroid = librosa.feature.spectral_centroid(S=D) # concatenate all features features = np.concatenate([mfcc, delta_mfcc, ddelta_mfcc, spec_centroid], axis=0) # save mfcc as a file file_name = file_name.replace('.wav', '.npy') save_file = spec_path + file_name if not os.path.exists(os.path.dirname(save_file)): os.makedirs(os.path.dirname(save_file)) np.save(save_file, features) f.close()