12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- def extract_spec(dataset='train'):
- f = open(data_path + dataset + '_list.txt', 'r')
- i = 0
- for file_name in f:
- i = i + 1
- if not (i % 10):
- print(i)
- # load audio file
- file_name = file_name.rstrip('\n')
- file_path = data_path + file_name
- # print file_path
- y0, sr = librosa.load(file_path, sr=22050)
- # we use first 1 second
- half = len(y0) / 4
- y = y0[:round(half)]
- # mfcc
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=MFCC_DIM)
- # delta mfcc and double delta
- delta_mfcc = librosa.feature.delta(mfcc)
- ddelta_mfcc = librosa.feature.delta(mfcc, order=2)
- # STFT
- D = np.abs(librosa.core.stft(y, hop_length=512, n_fft=1024, win_length=1024))
- D_dB = librosa.amplitude_to_db(D, ref=np.max)
- # mel spectrogram
- mel_S = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=128)
- S_dB = librosa.power_to_db(mel_S, ref=np.max) # log compression
- # spectral centroid
- spec_centroid = librosa.feature.spectral_centroid(S=D)
- # concatenate all features
- features = np.concatenate([mfcc, delta_mfcc, ddelta_mfcc, spec_centroid], axis=0)
- # save mfcc as a file
- file_name = file_name.replace('.wav', '.npy')
- save_file = spec_path + file_name
- if not os.path.exists(os.path.dirname(save_file)):
- os.makedirs(os.path.dirname(save_file))
- np.save(save_file, features)
- f.close()
|