|
@@ -97,8 +97,8 @@ def chinese_soundex(pinyin):
|
|
|
|
|
|
|
|
|
def compare_chinese_words(word1, word2):
|
|
|
- pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.NORMAL)])
|
|
|
- pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.NORMAL)])
|
|
|
+ pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE2, neutral_tone_with_five=True)])
|
|
|
+ pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE2, neutral_tone_with_five=True)])
|
|
|
|
|
|
soundex1 = chinese_soundex(pinyin1)
|
|
|
soundex2 = chinese_soundex(pinyin2)
|
|
@@ -121,13 +121,13 @@ def fuzzy_correct_chinese(text, correct_terms):
|
|
|
if word in error_correction:
|
|
|
corrected_words.append(error_correction[word])
|
|
|
else:
|
|
|
- # 如果不在錯誤修正字典中,則使用 Soundex 方法。先以自定義字典做諧音比較
|
|
|
+ # 如果不在錯誤修正字典中,則使用 Soundex 方法
|
|
|
for term in correct_terms:
|
|
|
if compare_chinese_words(word, term):
|
|
|
corrected_words.append(term)
|
|
|
break
|
|
|
- else:
|
|
|
- corrected_words.append(word)
|
|
|
+ else:
|
|
|
+ corrected_words.append(word)
|
|
|
return ' '.join(corrected_words)
|
|
|
|
|
|
|