tools.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334
  1. import torch
  2. import numpy as np
  3. from scipy.io.wavfile import write
  4. from audio.audio_processing import griffin_lim
  5. def get_mel_from_wav(audio, _stft):
  6. audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1)
  7. audio = torch.autograd.Variable(audio, requires_grad=False)
  8. melspec, energy = _stft.mel_spectrogram(audio)
  9. melspec = torch.squeeze(melspec, 0).numpy().astype(np.float32)
  10. energy = torch.squeeze(energy, 0).numpy().astype(np.float32)
  11. return melspec, energy
  12. def inv_mel_spec(mel, out_filename, _stft, griffin_iters=60):
  13. mel = torch.stack([mel])
  14. mel_decompress = _stft.spectral_de_normalize(mel)
  15. mel_decompress = mel_decompress.transpose(1, 2).data.cpu()
  16. spec_from_mel_scaling = 1000
  17. spec_from_mel = torch.mm(mel_decompress[0], _stft.mel_basis)
  18. spec_from_mel = spec_from_mel.transpose(0, 1).unsqueeze(0)
  19. spec_from_mel = spec_from_mel * spec_from_mel_scaling
  20. audio = griffin_lim(
  21. torch.autograd.Variable(spec_from_mel[:, :, :-1]), _stft._stft_fn, griffin_iters
  22. )
  23. audio = audio.squeeze()
  24. audio = audio.cpu().numpy()
  25. audio_path = out_filename
  26. write(audio_path, _stft.sampling_rate, audio)