Bläddra i källkod

Add urlToarticle.py

steven 2 månader sedan
förälder
incheckning
12d8b0f7e1
1 ändrade filer med 251 tillägg och 0 borttagningar
  1. 251 0
      urlToarticle.py

+ 251 - 0
urlToarticle.py

@@ -0,0 +1,251 @@
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import undetected_chromedriver as uc
+
+class ArticleGenerator:
+    def __init__(self, user_data_dir, profile_directory):
+        self.user_data_dir = user_data_dir
+        self.profile_directory = profile_directory
+        self.driver = None
+
+    def get_webdriver(self):
+        for attempt in range(3):  # 嘗試最多 3 次
+            try:
+                options = uc.ChromeOptions()
+                # options.add_argument("--disable-blink-features=AutomationControlled")
+                # options.add_experimental_option("excludeSwitches", ["enable-automation"])
+                # options.add_experimental_option("useAutomationExtension", False)
+                options.add_argument('--ignore-certificate-errors')
+                options.add_argument("--disable-gpu")
+                options.add_argument("--disable-dev-shm-usage")
+                # options.add_argument("headless")
+                options.add_argument(f"user-data-dir={self.user_data_dir}")
+                options.add_argument(f'--profile-directory={self.profile_directory}')
+                # s = Service(self.driver_path)
+                self.driver=uc.Chrome(options=options, version_main=132, use_subprocess=True)
+                self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
+                return self.driver
+
+            except Exception as e:
+                print(f'Error: {e}')
+                print(f"WebDriver 啟動失敗,第 {attempt + 1} 次嘗試...")
+                if attempt == 2:
+                    raise e
+                time.sleep(2)  # 等待 2 秒後重試
+
+    def generate_article(self, urls):
+        for attempt in range(3):
+            try:
+                driver = self.get_webdriver()
+                url = "https://notebooklm.google.com/"
+                driver.get(url)
+                time.sleep(3)
+                
+                # 新建專案 
+                new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]')
+                while not new_created:
+                    time.sleep(0.1)
+                    new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]')
+                new_created.click()
+                time.sleep(2)
+
+                # # 點擊 專案
+                # notebooks = driver.find_element(By.CLASS_NAME, 'project-buttons-flow ng-star-inserted')
+                # # 選取第一個 project
+                # new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button')
+                # while not new_notebook:
+                #     time.sleep(0.1)
+                #     new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button')
+                # new_notebook.click()
+
+                time.sleep(1)
+
+                # 將網址都輸入
+                for i, url in enumerate(urls):
+                    time.sleep(5)
+                    if i == 0:
+                        上傳來源 = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color')
+                        while not 上傳來源:
+                            time.sleep(0.1)
+                            上傳來源 = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color')
+                        上傳來源.click()
+                        time.sleep(3)
+                        # 找網站按鈕
+                        text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]')
+                        while not text_click:
+                            time.sleep(0.1)
+                            text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]')
+                        text_click.click()
+                        time.sleep(1)
+                        # 找到 textarea 並輸入網址
+                        website_input = driver.find_element(By.ID, 'mat-input-0')
+                        while not text_click:
+                            time.sleep(0.1)
+                            website_input = driver.find_element(By.ID, 'mat-input-0')
+                        website_input.send_keys(urls[0])
+                        website_input.send_keys(Keys.RETURN)
+                        print(f'成功輸入網址: {url}')
+                        time.sleep(7)
+                    else:
+                        try:
+                            WebDriverWait(driver, 20).until(
+                                EC.presence_of_element_located((By.CLASS_NAME, "mdc-button__label"))
+                            )
+                            新增來源 = driver.find_elements(By.CLASS_NAME, "mdc-button__label")[2]
+                            print(新增來源.is_displayed())
+                            while not 新增來源:
+                                time.sleep(0.1)
+                                新增來源 = driver.find_element(By.CLASS_NAME, "mdc-button__label")[2]
+                            新增來源.click()
+                        except Exception as e:
+                            print(f'Error: {e}')
+                            # 嘗試使用 JavaScript 來檢查元素是否可用
+                            driver.execute_script("return document.querySelector('button #mat-tab-group-0-content-0 > div > div > div > source-picker > div > div.ng-tns-c2551705568-5.ng-star-inserted > button > span.mat-mdc-button-persistent-ripple.mdc-button__ripple') != null")
+                        time.sleep(3)
+                        # 找網站按鈕
+                        try:
+                            # 取得整個按鈕列表
+                            clicks = driver.find_element(By.CLASS_NAME, 'chip-groups').find_elements(By.CSS_SELECTOR, '.chip-group.ng-star-inserted')
+                            print(clicks)
+                            text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
+                            while not text_click:
+                                time.sleep(0.1)
+                                text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
+                            text_click.click()
+                        except Exception as e:
+                            print(f'Error: {e}')
+                            # 如果捕獲到錯誤,使用 JavaScript 強制點擊
+                            try:
+                                text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
+                                driver.execute_script("arguments[0].click();", text_click)  # 使用 JS 點擊元素
+                                print("使用 JavaScript 成功點擊元素")
+                            except Exception as js_error:
+                                print(f'JavaScript 點擊錯誤: {js_error}')
+                        time.sleep(2)
+                        # 找到 textarea 並輸入網址
+                        # 取得上面幾層
+                        div = driver.find_element(By.TAG_NAME, 'website-upload')
+                        # print(div.text)
+                        website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input')
+                        # 確保元素可以操作
+                        while not website_input.is_displayed() or not website_input.is_enabled():
+                            time.sleep(0.1)
+                            website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input')
+                        try:
+                            website_input.send_keys(urls[0])
+                            website_input.send_keys(Keys.RETURN)
+                        except Exception as e:
+                            print(f'Error: {e}')
+                            # 使用 JavaScript 強制發送鍵盤事件
+                            driver.execute_script("arguments[0].value = arguments[1];", website_input, urls)  # 將文字輸入到 input
+                            driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", website_input)  # 觸發 input 事件
+                        print(f'成功輸入網址: {url}')
+                        time.sleep(7)            
+                    
+                time.sleep(5)
+
+                # 上面幾層
+                omnibar = driver.find_element(By.TAG_NAME, 'chat-panel').find_element(By.TAG_NAME, 'omnibar')
+                box = omnibar.find_element(By.TAG_NAME, 'query-box')
+                prompt_input = box.find_element(By.TAG_NAME, 'textarea')
+                while not prompt_input.is_displayed() or not prompt_input.is_enabled():
+                    time.sleep(0.1)
+                    prompt_input = box.find_element(By.TAG_NAME, 'textarea')
+                
+                j = 1
+                fore_content = ''
+                while '##' not in fore_content:
+                    # 輸入生成文章的 prompt
+                    complete_prompt = "1. 彙整並生成一篇以常見問題為主的專業文章(給客戶看的,最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式,不要條列式。5. 若內容不是中文,要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
+                    try:
+                        prompt_input.send_keys(complete_prompt)
+                        prompt_input.send_keys(Keys.RETURN)
+                    except Exception as e:
+                        print(f'Error: {e}')
+                        # 使用 JavaScript 強制發送鍵盤事件
+                        driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt)  # 將文字輸入到 input
+                        driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input)  # 觸發 input 事件
+
+
+                    print('等候答案')
+                    time.sleep(45)  # 等待答案生成
+                    message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
+                    answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
+                    while not answers[1].is_displayed() or not answers[1].is_enabled():
+                        time.sleep(0.1)
+                        answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
+                    fore_content = message.text
+                    j+=2
+                
+                # 直接生成 md 檔 或是 生成文字?
+                texts = []
+                for i, answer in enumerate(answers[1:]):
+                    if '###' not in fore_content:
+                        if i == 0:
+                            texts.append('# ' + answer.text.strip())
+                        elif i % 2 == 1 and i != len(answers[1:])-1:
+                            texts.append('## ' + answer.text.strip())
+                        elif i == len(answers[1:])-1:
+                            continue
+                        else:  
+                            texts.append(answer.text.strip())
+                    else:
+                        if '# # ' in answer.text:
+                            t = answer.text.replace('# # ', '# ').strip()
+                            texts.append(t)
+                        else:
+                            texts.append(answer.text.strip())
+
+                print(texts)
+                if len(texts) > 1:
+                    content = '\n'.join(texts)
+                else:
+                    content = texts[0].replace('# # ', '# ')
+                print(content)
+                is_succesed = self.save_article_as_md(content)
+                if is_succesed:
+                    return 'article.md 生成成功'
+                else:
+                    return '存檔失敗'
+            except Exception as e:
+                print("出現錯誤: ", str(e))
+                return False
+
+        if self.driver:
+            time.sleep(5)
+            self.driver.quit()
+    
+    def save_article_as_md(self, content, filename="./article.md"):
+        # 打開或創建一個 .md 文件
+        try:
+            with open(filename, 'w+', encoding='utf-8-sig') as file:
+                # 將文章內容寫入文件
+                file.write(content)
+                print(f"文章已成功保存為 {filename}")
+            return True
+        except Exception as e:
+            print(f"保存文章時發生錯誤: {str(e)}")
+            return False
+
+# 假設生成的文章內容如下(這會是你的生成內容)
+# 測試區域(僅在此模組直接執行時執行)
+if __name__ == "__main__":
+    # driver_path = 'chromedriver-win32/chromedriver.exe'
+    user_data_dir = 'C:/Users/wangy/AppData/Local/Google/Chrome/User Data'
+    profile_directory = 'Profile 20'
+
+    # 範例用法
+    article_generator = ArticleGenerator(user_data_dir, profile_directory)
+    urls = [
+        "https://www.virtualhumans.org/",
+        "https://en.wikipedia.org/wiki/Virtual_human",
+        "https://www.d-id.com/resources/glossary/virtual-humans/"
+    ]
+
+    is_succesed = article_generator.generate_article(urls)
+    if is_succesed:
+        print(is_succesed)
+