SherryLiu 7 meses atrás
pai
commit
2daf12682d
3 arquivos alterados com 19 adições e 5 exclusões
  1. 9 0
      READNE.md
  2. 5 0
      environment.env
  3. 5 5
      whisper.py

+ 9 - 0
READNE.md

@@ -0,0 +1,9 @@
+# 語音轉文字 ASR
+
+### Prerequisite
+- 語音檔存放路徑`data/`
+- `environment.env`
+
+### To run the code
+`pip install -r requirements.txt`
+`python whisper.py`

+ 5 - 0
environment.env

@@ -0,0 +1,5 @@
+# Choozemo
+SUPABASE_URI = "postgresql://postgres:chuz8310xsystex@db.ptt.cx:5432/postgres"
+SUPABASE_URL = "http://db.ptt.cx:8000/"
+SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
+OPENAI_API_KEY = "sk-proj-kGQPjKXup6g5QmjWvN3GT3BlbkFJDOYyhv8auoHBDIznmbgj"

+ 5 - 5
whisper.py

@@ -97,8 +97,8 @@ def chinese_soundex(pinyin):
 
 
 def compare_chinese_words(word1, word2):
-    pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.NORMAL)])
-    pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.NORMAL)])
+    pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE2, neutral_tone_with_five=True)])
+    pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE2, neutral_tone_with_five=True)])
     
     soundex1 = chinese_soundex(pinyin1)
     soundex2 = chinese_soundex(pinyin2)
@@ -121,13 +121,13 @@ def fuzzy_correct_chinese(text, correct_terms):
         if word in error_correction:
             corrected_words.append(error_correction[word])
         else:
-            # 如果不在錯誤修正字典中,則使用 Soundex 方法。先以自定義字典做諧音比較
+            # 如果不在錯誤修正字典中,則使用 Soundex 方法
             for term in correct_terms:
                 if compare_chinese_words(word, term):
                     corrected_words.append(term)
                     break
-                else:
-                    corrected_words.append(word)
+            else:
+                corrected_words.append(word)
     return ' '.join(corrected_words)