audio_processing.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import torch
  2. import numpy as np
  3. import librosa.util as librosa_util
  4. from scipy.signal import get_window
  5. def window_sumsquare(
  6. window,
  7. n_frames,
  8. hop_length,
  9. win_length,
  10. n_fft,
  11. dtype=np.float32,
  12. norm=None,
  13. ):
  14. """
  15. # from librosa 0.6
  16. Compute the sum-square envelope of a window function at a given hop length.
  17. This is used to estimate modulation effects induced by windowing
  18. observations in short-time fourier transforms.
  19. Parameters
  20. ----------
  21. window : string, tuple, number, callable, or list-like
  22. Window specification, as in `get_window`
  23. n_frames : int > 0
  24. The number of analysis frames
  25. hop_length : int > 0
  26. The number of samples to advance between frames
  27. win_length : [optional]
  28. The length of the window function. By default, this matches `n_fft`.
  29. n_fft : int > 0
  30. The length of each analysis frame.
  31. dtype : np.dtype
  32. The data type of the output
  33. Returns
  34. -------
  35. wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
  36. The sum-squared envelope of the window function
  37. """
  38. if win_length is None:
  39. win_length = n_fft
  40. n = n_fft + hop_length * (n_frames - 1)
  41. x = np.zeros(n, dtype=dtype)
  42. # Compute the squared window at the desired length
  43. win_sq = get_window(window, win_length, fftbins=True)
  44. win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2
  45. win_sq = librosa_util.pad_center(win_sq, n_fft)
  46. # Fill the envelope
  47. for i in range(n_frames):
  48. sample = i * hop_length
  49. x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
  50. return x
  51. def griffin_lim(magnitudes, stft_fn, n_iters=30):
  52. """
  53. PARAMS
  54. ------
  55. magnitudes: spectrogram magnitudes
  56. stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
  57. """
  58. angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
  59. angles = angles.astype(np.float32)
  60. angles = torch.autograd.Variable(torch.from_numpy(angles))
  61. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  62. for i in range(n_iters):
  63. _, angles = stft_fn.transform(signal)
  64. signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
  65. return signal
  66. def dynamic_range_compression(x, C=1, clip_val=1e-5):
  67. """
  68. PARAMS
  69. ------
  70. C: compression factor
  71. """
  72. return torch.log(torch.clamp(x, min=clip_val) * C)
  73. def dynamic_range_decompression(x, C=1):
  74. """
  75. PARAMS
  76. ------
  77. C: compression factor used to compress
  78. """
  79. return torch.exp(x) / C