preprocess.yaml 792 B

123456789101112131415161718192021222324252627282930
  1. dataset: "LJSpeech"
  2. path:
  3. corpus_path: "/home/ming/Data/LJSpeech-1.1"
  4. lexicon_path: "lexicon/librispeech-lexicon.txt"
  5. raw_path: "./raw_data/LJSpeech"
  6. preprocessed_path: "./preprocessed_data/LJSpeech"
  7. preprocessing:
  8. val_size: 512
  9. text:
  10. text_cleaners: ["english_cleaners"]
  11. language: "en"
  12. audio:
  13. sampling_rate: 22050
  14. max_wav_value: 32768.0
  15. stft:
  16. filter_length: 1024
  17. hop_length: 256
  18. win_length: 1024
  19. mel:
  20. n_mel_channels: 80
  21. mel_fmin: 0
  22. mel_fmax: 8000 # please set to 8000 for HiFi-GAN vocoder, set to null for MelGAN vocoder
  23. pitch:
  24. feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
  25. normalization: True
  26. energy:
  27. feature: "phoneme_level" # support 'phoneme_level' or 'frame_level'
  28. normalization: True