model.yaml 932 B

12345678910111213141516171819202122232425262728293031323334353637
  1. transformer:
  2. encoder_layer: 4
  3. encoder_head: 2
  4. encoder_hidden: 256
  5. decoder_layer: 6
  6. decoder_head: 2
  7. decoder_hidden: 256
  8. conv_filter_size: 1024
  9. conv_kernel_size: [9, 1]
  10. encoder_dropout: 0.2
  11. decoder_dropout: 0.2
  12. variance_predictor:
  13. filter_size: 256
  14. kernel_size: 3
  15. dropout: 0.5
  16. variance_embedding:
  17. pitch_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the pitch values are not normalized during preprocessing
  18. energy_quantization: "linear" # support 'linear' or 'log', 'log' is allowed only if the energy values are not normalized during preprocessing
  19. n_bins: 256
  20. # gst:
  21. # use_gst: False
  22. # conv_filters: [32, 32, 64, 64, 128, 128]
  23. # gru_hidden: 128
  24. # token_size: 128
  25. # n_style_token: 10
  26. # attn_head: 4
  27. multi_speaker: True
  28. max_seq_len: 1000
  29. vocoder:
  30. model: "HiFi-GAN" # support 'HiFi-GAN', 'MelGAN'
  31. speaker: "universal" # support 'LJSpeech', 'universal'