libritts.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import os
  2. import librosa
  3. import numpy as np
  4. from scipy.io import wavfile
  5. from tqdm import tqdm
  6. from text import _clean_text
  7. def prepare_align(config):
  8. in_dir = config["path"]["corpus_path"]
  9. out_dir = config["path"]["raw_path"]
  10. sampling_rate = config["preprocessing"]["audio"]["sampling_rate"]
  11. max_wav_value = config["preprocessing"]["audio"]["max_wav_value"]
  12. cleaners = config["preprocessing"]["text"]["text_cleaners"]
  13. for speaker in tqdm(os.listdir(in_dir)):
  14. for chapter in os.listdir(os.path.join(in_dir, speaker)):
  15. for file_name in os.listdir(os.path.join(in_dir, speaker, chapter)):
  16. if file_name[-4:] != ".wav":
  17. continue
  18. base_name = file_name[:-4]
  19. text_path = os.path.join(
  20. in_dir, speaker, chapter, "{}.normalized.txt".format(base_name)
  21. )
  22. wav_path = os.path.join(
  23. in_dir, speaker, chapter, "{}.wav".format(base_name)
  24. )
  25. with open(text_path) as f:
  26. text = f.readline().strip("\n")
  27. text = _clean_text(text, cleaners)
  28. os.makedirs(os.path.join(out_dir, speaker), exist_ok=True)
  29. wav, _ = librosa.load(wav_path, sampling_rate)
  30. wav = wav / max(abs(wav)) * max_wav_value
  31. wavfile.write(
  32. os.path.join(out_dir, speaker, "{}.wav".format(base_name)),
  33. sampling_rate,
  34. wav.astype(np.int16),
  35. )
  36. with open(
  37. os.path.join(out_dir, speaker, "{}.lab".format(base_name)),
  38. "w",
  39. ) as f1:
  40. f1.write(text)