symbols.py 862 B

1234567891011121314151617181920212223242526272829
  1. """ from https://github.com/keithito/tacotron """
  2. """
  3. Defines the set of symbols used in text input to the model.
  4. The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. """
  5. from text import cmudict, pinyin
  6. _pad = "_"
  7. _punctuation = "!'(),.:;? "
  8. _special = "-"
  9. _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  10. _silences = ["@sp", "@spn", "@sil"]
  11. # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
  12. _arpabet = ["@" + s for s in cmudict.valid_symbols]
  13. _pinyin = ["@" + s for s in pinyin.valid_symbols]
  14. # Export all symbols:
  15. symbols = (
  16. [_pad]
  17. + list(_special)
  18. + list(_punctuation)
  19. + list(_letters)
  20. + _arpabet
  21. + _pinyin
  22. + _silences
  23. )