1 年間前 · 9bfb5803ac
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,152 @@
 
				+aiohappyeyeballs==2.4.0
			
 
				+aiohttp==3.10.5
			
 
				+aiosignal==1.3.1
			
 
				+annotated-types==0.7.0
			
 
				+anyio==4.4.0
			
 
				+attrs==24.2.0
			
 
				+audioread==3.0.1
			
 
				+babel==2.16.0
			
 
				+bce-python-sdk==0.9.19
			
 
				+blinker==1.8.2
			
 
				+Bottleneck==1.4.0
			
 
				+certifi==2024.7.4
			
 
				+cffi==1.17.0
			
 
				+charset-normalizer==3.3.2
			
 
				+click==8.1.7
			
 
				+colorama==0.4.6
			
 
				+coloredlogs==15.0.1
			
 
				+colorlog==6.8.2
			
 
				+contourpy==1.3.0
			
 
				+cycler==0.12.1
			
 
				+Cython==3.0.11
			
 
				+datasets==2.21.0
			
 
				+decorator==5.1.1
			
 
				+dill==0.3.4
			
 
				+Distance==0.1.3
			
 
				+distro==1.9.0
			
 
				+editdistance==0.8.1
			
 
				+fastapi==0.112.2
			
 
				+filelock==3.15.4
			
 
				+Flask==3.0.3
			
 
				+flask-babel==4.0.0
			
 
				+flatbuffers==24.3.25
			
 
				+fonttools==4.53.1
			
 
				+frozenlist==1.4.1
			
 
				+fsspec==2024.6.1
			
 
				+future==1.0.0
			
 
				+fuzzywuzzy==0.18.0
			
 
				+g2p-en==2.1.0
			
 
				+g2pM==0.1.2.5
			
 
				+h11==0.14.0
			
 
				+h5py==3.11.0
			
 
				+httpcore==1.0.5
			
 
				+httpx==0.27.2
			
 
				+huggingface-hub==0.24.6
			
 
				+humanfriendly==10.0
			
 
				+idna==3.8
			
 
				+inflect==7.3.1
			
 
				+itsdangerous==2.2.0
			
 
				+jieba==0.42.1
			
 
				+Jinja2==3.1.4
			
 
				+jiter==0.5.0
			
 
				+joblib==1.4.2
			
 
				+jsonlines==4.0.0
			
 
				+kaldiio==2.18.0
			
 
				+kiwisolver==1.4.5
			
 
				+Levenshtein==0.25.1
			
 
				+librosa==0.8.1
			
 
				+llvmlite==0.43.0
			
 
				+loguru==0.7.2
			
 
				+lxml==5.3.0
			
 
				+markdown-it-py==3.0.0
			
 
				+MarkupSafe==2.1.5
			
 
				+matplotlib==3.9.2
			
 
				+mdurl==0.1.2
			
 
				+mock==5.1.0
			
 
				+more-itertools==10.4.0
			
 
				+mpmath==1.3.0
			
 
				+multidict==6.0.5
			
 
				+multiprocess==0.70.12.2
			
 
				+nara-wpe==0.0.10
			
 
				+nltk==3.9.1
			
 
				+numba==0.60.0
			
 
				+numpy==2.0.2
			
 
				+onnx==1.16.2
			
 
				+onnxruntime==1.19.0
			
 
				+openai==1.42.0
			
 
				+packaging==24.1
			
 
				+paddle2onnx==0.8.1
			
 
				+paddleaudio==1.0.2
			
 
				+paddlefsl==1.1.0
			
 
				+paddlenlp==2.6.1
			
 
				+paddlespeech==1.0.1
			
 
				+paddlespeech-feat==0.1.0
			
 
				+pandas==2.2.2
			
 
				+pathos==0.2.8
			
 
				+pattern_singleton==1.2.0
			
 
				+pillow==10.4.0
			
 
				+pip-autoremove==0.10.0
			
 
				+platformdirs==4.2.2
			
 
				+pooch==1.8.2
			
 
				+portalocker==2.10.1
			
 
				+pox==0.3.4
			
 
				+ppft==1.7.6.8
			
 
				+praatio==5.0.0
			
 
				+prettytable==3.11.0
			
 
				+protobuf==3.20.2
			
 
				+psutil==6.0.0
			
 
				+pyarrow==17.0.0
			
 
				+pycparser==2.22
			
 
				+pycryptodome==3.20.0
			
 
				+pydantic==2.8.2
			
 
				+pydantic_core==2.20.1
			
 
				+Pygments==2.18.0
			
 
				+pyparsing==3.1.4
			
 
				+pypinyin==0.52.0
			
 
				+pypinyin-dict==0.8.0
			
 
				+pytest-runner==6.0.1
			
 
				+python-dateutil==2.9.0.post0
			
 
				+python-dotenv==1.0.1
			
 
				+python-Levenshtein==0.25.1
			
 
				+pytz==2024.1
			
 
				+pyworld==0.3.4
			
 
				+PyYAML==6.0.2
			
 
				+rapidfuzz==3.9.6
			
 
				+rarfile==4.2
			
 
				+regex==2024.7.24
			
 
				+requests==2.32.3
			
 
				+resampy==0.2.2
			
 
				+rich==13.8.0
			
 
				+sacrebleu==2.4.3
			
 
				+safetensors==0.4.4
			
 
				+scikit-learn==1.5.1
			
 
				+scipy==1.14.1
			
 
				+sentencepiece==0.1.99
			
 
				+seqeval==1.2.2
			
 
				+shellingham==1.5.4
			
 
				+six==1.16.0
			
 
				+sniffio==1.3.1
			
 
				+soundfile==0.12.1
			
 
				+starlette==0.38.2
			
 
				+sympy==1.13.2
			
 
				+tabulate==0.9.0
			
 
				+TextGrid==1.6.1
			
 
				+threadpoolctl==3.5.0
			
 
				+tiktoken==0.7.0
			
 
				+timer==0.3.0
			
 
				+tqdm==4.66.5
			
 
				+typeguard==4.3.0
			
 
				+typer==0.12.5
			
 
				+typing_extensions==4.12.2
			
 
				+tzdata==2024.1
			
 
				+urllib3==2.2.2
			
 
				+uvicorn==0.30.6
			
 
				+visualdl==2.5.3
			
 
				+wcwidth==0.2.13
			
 
				+webrtcvad==2.0.10
			
 
				+websockets==13.0.1
			
 
				+Werkzeug==3.0.4
			
 
				+xxhash==3.5.0
			
 
				+yacs==0.1.8
			
 
				+yarl==1.9.4
			
 
				+zhon==2.0.2
			
--- a/src/audio_processing.py
+++ b/src/audio_processing.py
@@ -0,0 +1,58 @@
 
				+from openai import OpenAI
			
 
				+from config import SYSTEM_PROMPT, OPEN_API_KEY, SUPABASE_KEY, SUPABASE_URL
			
 
				+from supabase import create_client, Client
			
 
				+from text_processing import fuzzy_correct_chinese
			
 
				+import csv
			
 
				+
			
 
				+client = OpenAI(api_key=OPEN_API_KEY)
			
 
				+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
			
 
				+
			
 
				+def load_custom_vocab_from_csv(file_path):
			
 
				+    custom_vocab = []
			
 
				+    try:
			
 
				+        with open(file_path, 'r', encoding='utf-8') as csvfile:
			
 
				+            reader = csv.DictReader(csvfile)
			
 
				+            custom_vocab = [row['brand'] for row in reader if 'brand' in row]
			
 
				+    except Exception as e:
			
 
				+        print(f"Error reading CSV file: {str(e)}")
			
 
				+        print("Using empty vocabulary.")
			
 
				+    return custom_vocab
			
 
				+
			
 
				+def transcribe(audio_file):
			
 
				+    try:
			
 
				+        custom_vocab = load_custom_vocab_from_csv('brand_database_rows.csv')
			
 
				+        
			
 
				+        transcript = client.audio.transcriptions.create(
			
 
				+            file=audio_file,
			
 
				+            model="whisper-1",
			
 
				+            response_format="text", 
			
 
				+            prompt=f"請注意以下詞彙：{custom_vocab}"
			
 
				+        )
			
 
				+        return transcript
			
 
				+    except Exception as e:
			
 
				+        print(f"轉錄時發生錯誤：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+def post_process_transcript(transcript, temperature=0):
			
 
				+    corrected_transcript = fuzzy_correct_chinese(transcript)
			
 
				+    
			
 
				+    messages = [
			
 
				+        {"role": "system", "content": SYSTEM_PROMPT},
			
 
				+        {"role": "user", "content": f"請校對並修正以下轉錄文本，但不要改變其原意或回答問題，也不要更動英文的大小寫：\n\n{corrected_transcript}"}
			
 
				+    ]
			
 
				+
			
 
				+    response = client.chat.completions.create(
			
 
				+        model="gpt-4",
			
 
				+        temperature=temperature,
			
 
				+        messages=messages
			
 
				+    )
			
 
				+
			
 
				+    return response.choices[0].message.content
			
 
				+
			
 
				+def process_audio(audio_data):
			
 
				+    raw_transcript = transcribe(audio_data)
			
 
				+    print(raw_transcript)
			
 
				+    if raw_transcript is None:
			
 
				+        return None, None
			
 
				+    corrected_transcript = post_process_transcript(raw_transcript)
			
 
				+    return raw_transcript, corrected_transcript
			
--- a/src/brand_database_rows.csv
+++ b/src/brand_database_rows.csv
@@ -0,0 +1,161 @@
 
				+id,brand,category

			
 
				+1,BALENCIAGA,

			
 
				+2,BOSS,

			
 
				+3,BURBERRY,

			
 
				+4,CELINE,

			
 
				+5,COS,

			
 
				+6,COACH,

			
 
				+7,Dior,

			
 
				+8,FENDI,

			
 
				+9,GUCCI,

			
 
				+10,KENZO,

			
 
				+11,Louis Vuitton,

			
 
				+12,LV,

			
 
				+13,MONTBLANC,

			
 
				+14,POLO,

			
 
				+15,Tory Burch,

			
 
				+16,VERSACE,

			
 
				+17,BAO BAO ISSEY MIYAKE,

			
 
				+18,Berluti,

			
 
				+19,BOTTEGA VENETA,

			
 
				+20,ZEGNA,

			
 
				+21,FERRAGAMO,

			
 
				+22,LONGCHAMP,

			
 
				+23,Loro Piana,

			
 
				+24,maje,

			
 
				+25,MICHAEL KORS,

			
 
				+26,Moncler,

			
 
				+27,PLEATS PLEASE,

			
 
				+28,SAINT LAURENT,

			
 
				+29,A. Lange & Söhne,

			
 
				+30,BLANCPAIN,

			
 
				+31,BOUCHERON,

			
 
				+32,BREGUET,

			
 
				+33,BREITLING,

			
 
				+34,BVLGARI,

			
 
				+35,Cartier,

			
 
				+36,CHANEL,

			
 
				+37,CHAUMET,

			
 
				+38,CHOPARD,

			
 
				+39,DAMIANI,

			
 
				+40,DE BEERS,

			
 
				+41,FRED,

			
 
				+42,HARRY WINSTON,

			
 
				+43,Grand Seiko,

			
 
				+44,HUBLOT,

			
 
				+45,IWC,

			
 
				+46,JADEGIA,

			
 
				+47,玉世家,

			
 
				+48,JAEGER-LECOULTRE,

			
 
				+49,LONGINES,

			
 
				+50,MIKIMOTO,

			
 
				+51,OMEGA,

			
 
				+52,歐米茄,

			
 
				+53,PANERAI,

			
 
				+54,PATEK PHILIPPE,

			
 
				+55,PIAGET,

			
 
				+56,RADO,

			
 
				+57,ROGER DUBUIS,

			
 
				+58,Rolex,

			
 
				+59,勞力士,

			
 
				+60,Sincere Haute Horlogerie,

			
 
				+61,TAG Heuer,

			
 
				+62,Tiffany & Co.,

			
 
				+63,TISSOT,

			
 
				+64,TUDOR,

			
 
				+65,帝舵表,

			
 
				+66,VACHERON CONSTANTIN,

			
 
				+67,Van Cleef & Arpels,

			
 
				+68,PEDRO,

			
 
				+69,2020EYEhaus,

			
 
				+70,APM MONACO,

			
 
				+71,BAO BAO ISSEY MIYAKE,

			
 
				+72,CHARLES & KEITH,

			
 
				+73,HOGAN,

			
 
				+74,KANGOL,

			
 
				+75,MIRROR,

			
 
				+76,皇宣緣,

			
 
				+77,PANDORA,

			
 
				+78,Redline,

			
 
				+79,SWAROVSKI,

			
 
				+80,SWATCH,

			
 
				+81,The Way Eyewear,

			
 
				+82,TUMI,

			
 
				+83,vacanza,

			
 
				+84,A|X ARMANI EXCHANGE,

			
 
				+85,adidas SWC,

			
 
				+86,Benetton,

			
 
				+87,Brooks Brothers,

			
 
				+88,CALVIN KLEIN JEANS,

			
 
				+89,Crocs,

			
 
				+91,iROO,

			
 
				+92,LULULEMON,

			
 
				+93,Massimo Dutti,

			
 
				+94,MLB,

			
 
				+95,NB GREY Image Main Store,

			
 
				+96,NB GREY,

			
 
				+97,NIKE KICKS LOUNGE,

			
 
				+98,Onitsuka Tiger,

			
 
				+99,鬼塚虎,

			
 
				+100,PEDRO,

			
 
				+101,PORTER INTERNATIONAL,

			
 
				+102,ROOTS,

			
 
				+103,THE NORTH FACE,

			
 
				+104,THE NORTH FACE Taipei 101 store,

			
 
				+105,TOMMY HILFIGER,

			
 
				+106,ZARA,

			
 
				+107,ABC Cooking Studio,

			
 
				+108,Apple Taipei 101,

			
 
				+109,Apple,

			
 
				+110,Bang & Olufsen,

			
 
				+111,Devialet,

			
 
				+112,FamilyMart,

			
 
				+113,LAMY,

			
 
				+114,Sony,

			
 
				+115,Sony 台北 101 直營店,

			
 
				+116,Taipei 101 Observation Deck,

			
 
				+117,Taipei Fubon Bank,

			
 
				+118,World Gym Elite,

			
 
				+119,台北101觀景台,

			
 
				+120,台北富邦銀行,

			
 
				+121,全家便利商店,

			
 
				+122,Aesop,

			
 
				+123,Chanel Beauty,

			
 
				+124,CREED,

			
 
				+125,Dior Beauty,

			
 
				+126,JO MALONE LONDON,

			
 
				+127,SISLEY,

			
 
				+128,Yves Saint Laurent Beauté,

			
 
				+129,SwissKubiK,

			
 
				+130,adidas,

			
 
				+131,CHANEL WATCH STORE,

			
 
				+132,寶格麗,

			
 
				+133,天梭表,

			
 
				+134,瑞士雷達表,

			
 
				+135,卡地亞台北旗艦店,

			
 
				+136,香奈兒腕錶專門店,

			
 
				+137,百年靈,

			
 
				+138,浪琴表,

			
 
				+139,香奈兒,

			
 
				+140,ACERA,

			
 
				+141,乾唐軒,

			
 
				+142,AMBI SPACE ONE,

			
 
				+143,The one at Taipei 101,

			
 
				+144,Toyama Xiangtang,

			
 
				+145,富山香堂,

			
 
				+146,FushanKodo,

			
 
				+147,sugarfina,

			
 
				+148,MK,

			
 
				+149,YSL,

			
 
				+150,HW,

			
 
				+151,GS,

			
 
				+152,PP,

			
 
				+153,SHH,

			
 
				+154,VC,

			
 
				+155,VCA,

			
 
				+157,BV,

			
 
				+156,C&K,

			
 
				+158,CK,

			
 
				+90,GIORDANO LADIES,

			
 
				+159,new balance,

			
 
				+160,YSL Beauté,
			
--- a/src/config.py
+++ b/src/config.py
@@ -0,0 +1,34 @@
 
				+import os
			
 
				+from dotenv import load_dotenv
			
 
				+
			
 
				+load_dotenv()
			
 
				+
			
 
				+SUPABASE_URL: str =  os.environ.get('SUPABASE_URL')
			
 
				+SUPABASE_KEY: str = os.environ.get('SUPABASE_KEY')
			
 
				+OPEN_API_KEY: str = os.environ.get('OPENAI_API_KEY')
			
 
				+
			
 
				+if not SUPABASE_URL or not SUPABASE_KEY:
			
 
				+    raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set in the .env file")
			
 
				+
			
 
				+
			
 
				+SYSTEM_PROMPT = """你是一位專業的轉錄校對助理，專門處理有關品牌的對話轉錄。
			
 
				+你的任務是：
			
 
				+1. 確保中英文品牌的正確性，大小寫不要更動。
			
 
				+2. 在必要時添加適當的標點符號，如句號、逗號
			
 
				+3. 使用台灣的繁體中文或英文，確保語言表達符合台灣的用語習慣。
			
 
				+4. 只更正明顯的錯誤或改善可讀性，不要改變原文的意思或結構。
			
 
				+5. 不要回答問題、解釋概念或添加任何不在原文中的信息。
			
 
				+6. 如果原文是一個問句，保持它的問句形式，不要提供答案。
			
 
				+
			
 
				+請只根據提供的原文進行必要的更正，不要添加或刪除任何實質性內容。在修正時，請特別注意上下文，確保修正後的詞語符合整句話的語境。"""
			
 
				+
			
 
				+CORRECT_TERMS = [
			
 
				+    "品牌", "101", "一零一"
			
 
				+]
			
 
				+
			
 
				+ERROR_CORRECTION = {
			
 
				+    "庫治": "cucci",
			
 
				+    "Coles": "cos",
			
 
				+    "粉絲": "versace",
			
 
				+    "St. Lawrence": "saint laurent"
			
 
				+}
			
--- a/src/main_script.py
+++ b/src/main_script.py
@@ -0,0 +1,67 @@
 
				+import sys
			
 
				+from dictionary_loader import load_word_database_dictionary_from_supabase
			
 
				+from audio_processing import process_audio
			
 
				+
			
 
				+def initialize():
			
 
				+    word_database_success = load_word_database_dictionary_from_supabase()
			
 
				+    if not word_database_success:
			
 
				+        print("Warning: Word Database Dictionary loading failed. Proceeding with default dictionary.")
			
 
				+
			
 
				+
			
 
				+def process_audio_file(audio_file):
			
 
				+    try:
			
 
				+        result = process_audio(audio_file)
			
 
				+        if isinstance(result, tuple) and len(result) == 2:
			
 
				+            return result
			
 
				+        else:
			
 
				+            print("Unexpected result from process_audio")
			
 
				+            return None, None
			
 
				+    except Exception as e:
			
 
				+        print(f"Error processing audio: {str(e)}")
			
 
				+        return None, None
			
 
				+    
			
 
				+# 加入檢查user是否詢問特定問題
			
 
				+def main(audio_file_path):
			
 
				+    initialize()
			
 
				+    try:
			
 
				+        with open(audio_file_path, "rb") as audio_file:
			
 
				+            raw_transcript, corrected_transcript = process_audio_file(audio_file)
			
 
				+        
			
 
				+        if raw_transcript and corrected_transcript:
			
 
				+            print(f"Raw transcript: {raw_transcript}")
			
 
				+            print(f"Corrected transcript: {corrected_transcript}")
			
 
				+            
			
 
				+        else:
			
 
				+            print("Audio processing failed.")
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"Error: The file '{audio_file_path}' was not found.")
			
 
				+    except Exception as e:
			
 
				+        print(f"An unexpected error occurred: {str(e)}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+## 原本的main
			
 
				+# def main(audio_file_path):
			
 
				+#     initialize()
			
 
				+#     try:
			
 
				+#         with open(audio_file_path, "rb") as audio_file:
			
 
				+#             result = process_audio_file(audio_file)
			
 
				+        
			
 
				+#         if result:
			
 
				+#             print(result)
			
 
				+#         else:
			
 
				+#             print("Audio processing failed.")
			
 
				+#     except FileNotFoundError:
			
 
				+#         print(f"Error: The file '{audio_file_path}' was not found.")
			
 
				+#     except Exception as e:
			
 
				+#         print(f"An unexpected error occurred: {str(e)}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    if len(sys.argv) != 2:
			
 
				+        print("Usage: python script_name.py <audio_file_path>")
			
 
				+        sys.exit(1)
			
 
				+    
			
 
				+    audio_file_path = sys.argv[1]
			
 
				+    main(audio_file_path)
			
--- a/src/text_processing.py
+++ b/src/text_processing.py
@@ -0,0 +1,71 @@
 
				+import jieba
			
 
				+from pypinyin import pinyin, Style
			
 
				+from config import CORRECT_TERMS, ERROR_CORRECTION
			
 
				+
			
 
				+def chinese_soundex(pinyin_str):
			
 
				+    soundex_map = {
			
 
				+        'b': '1', 'p': '1', 'm': '1', 'f': '1',
			
 
				+        'd': '2', 'n': '2', 'l': '2',
			
 
				+        'g': '3', 'k': '3', 'h': '3', 't': '3',
			
 
				+        'j': '4', 'q': '4', 'x': '4',
			
 
				+        'zh': '5', 'ch': '5', 'sh': '5', 'r': '5',
			
 
				+        'z': '6', 'c': '6', 's': '6',
			
 
				+        'an': '7', 'ang': '7', 'en': '8', 'eng': '8', 'in': '8', 'ing': '8',
			
 
				+        'ong': '9', 'un': '9', 'uan': '9',
			
 
				+        'i': 'A', 'u': 'A', 'v': 'A',
			
 
				+        'e': 'B', 'o': 'B',
			
 
				+    }
			
 
				+    
			
 
				+    code = ''
			
 
				+    tone = '0'
			
 
				+    i = 0
			
 
				+    while i < len(pinyin_str):
			
 
				+        if pinyin_str[i:i+2] in soundex_map:
			
 
				+            code += soundex_map[pinyin_str[i:i+2]]
			
 
				+            i += 2
			
 
				+        elif pinyin_str[i] in soundex_map:
			
 
				+            code += soundex_map[pinyin_str[i]]
			
 
				+            i += 1
			
 
				+        elif pinyin_str[i].isdigit():
			
 
				+            tone = pinyin_str[i]
			
 
				+            i += 1
			
 
				+        else:
			
 
				+            i += 1
			
 
				+    
			
 
				+    code = code[:1] + ''.join(sorted(set(code[1:])))
			
 
				+    return (code[:3] + tone).ljust(4, '0')
			
 
				+
			
 
				+def compare_chinese_words(word1, word2, tone_sensitive=True):
			
 
				+    pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				+    pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				+    
			
 
				+    soundex1 = chinese_soundex(pinyin1)
			
 
				+    # print(soundex1)
			
 
				+    soundex2 = chinese_soundex(pinyin2)
			
 
				+    # print('soundex2', soundex2)
			
 
				+
			
 
				+    if tone_sensitive:
			
 
				+        return soundex1 == soundex2
			
 
				+    else:
			
 
				+        return soundex1[:3] == soundex2[:3]
			
 
				+
			
 
				+def fuzzy_correct_chinese(text):
			
 
				+    words = jieba.lcut(text)
			
 
				+    corrected_words = []
			
 
				+    for word in words:
			
 
				+        if word.isalpha():
			
 
				+            corrected_words.append(word)
			
 
				+            continue
			
 
				+        word_pinyin = ''.join([p[0] for p in pinyin(word, style=Style.NORMAL)])
			
 
				+        # print(f"Term: {word}, Pinyin: {word_pinyin}")
			
 
				+        if word in ERROR_CORRECTION:
			
 
				+            corrected_words.append(ERROR_CORRECTION[word])
			
 
				+        else:
			
 
				+            for term in CORRECT_TERMS:
			
 
				+                if compare_chinese_words(word, term, tone_sensitive=True):
			
 
				+                    # print(f"corrected: {word} -> {term}")
			
 
				+                    corrected_words.append(term)
			
 
				+                    break
			
 
				+            else:
			
 
				+                corrected_words.append(word)
			
 
				+    return ''.join(corrected_words)