utils_9.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. def extract_spec(dataset='train'):
  2. f = open(data_path + dataset + '_list.txt', 'r')
  3. i = 0
  4. for file_name in f:
  5. i = i + 1
  6. if not (i % 10):
  7. print(i)
  8. # load audio file
  9. file_name = file_name.rstrip('\n')
  10. file_path = data_path + file_name
  11. # print file_path
  12. y0, sr = librosa.load(file_path, sr=22050)
  13. # we use first 1 second
  14. half = len(y0) / 4
  15. y = y0[:round(half)]
  16. # mfcc
  17. mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=MFCC_DIM)
  18. # delta mfcc and double delta
  19. delta_mfcc = librosa.feature.delta(mfcc)
  20. ddelta_mfcc = librosa.feature.delta(mfcc, order=2)
  21. # STFT
  22. D = np.abs(librosa.core.stft(y, hop_length=512, n_fft=1024, win_length=1024))
  23. D_dB = librosa.amplitude_to_db(D, ref=np.max)
  24. # mel spectrogram
  25. mel_S = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=128)
  26. S_dB = librosa.power_to_db(mel_S, ref=np.max) # log compression
  27. # spectral centroid
  28. spec_centroid = librosa.feature.spectral_centroid(S=D)
  29. # concatenate all features
  30. features = np.concatenate([mfcc, delta_mfcc, ddelta_mfcc, spec_centroid], axis=0)
  31. # save mfcc as a file
  32. file_name = file_name.replace('.wav', '.npy')
  33. save_file = spec_path + file_name
  34. if not os.path.exists(os.path.dirname(save_file)):
  35. os.makedirs(os.path.dirname(save_file))
  36. np.save(save_file, features)
  37. f.close()