|
@@ -0,0 +1,251 @@
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+import time
|
|
|
+import undetected_chromedriver as uc
|
|
|
+
|
|
|
+class ArticleGenerator:
|
|
|
+ def __init__(self, user_data_dir, profile_directory):
|
|
|
+ self.user_data_dir = user_data_dir
|
|
|
+ self.profile_directory = profile_directory
|
|
|
+ self.driver = None
|
|
|
+
|
|
|
+ def get_webdriver(self):
|
|
|
+ for attempt in range(3): # 嘗試最多 3 次
|
|
|
+ try:
|
|
|
+ options = uc.ChromeOptions()
|
|
|
+ # options.add_argument("--disable-blink-features=AutomationControlled")
|
|
|
+ # options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
|
+ # options.add_experimental_option("useAutomationExtension", False)
|
|
|
+ options.add_argument('--ignore-certificate-errors')
|
|
|
+ options.add_argument("--disable-gpu")
|
|
|
+ options.add_argument("--disable-dev-shm-usage")
|
|
|
+ # options.add_argument("headless")
|
|
|
+ options.add_argument(f"user-data-dir={self.user_data_dir}")
|
|
|
+ options.add_argument(f'--profile-directory={self.profile_directory}')
|
|
|
+ # s = Service(self.driver_path)
|
|
|
+ self.driver=uc.Chrome(options=options, version_main=132, use_subprocess=True)
|
|
|
+ self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
|
+ return self.driver
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ print(f"WebDriver 啟動失敗,第 {attempt + 1} 次嘗試...")
|
|
|
+ if attempt == 2:
|
|
|
+ raise e
|
|
|
+ time.sleep(2) # 等待 2 秒後重試
|
|
|
+
|
|
|
+ def generate_article(self, urls):
|
|
|
+ for attempt in range(3):
|
|
|
+ try:
|
|
|
+ driver = self.get_webdriver()
|
|
|
+ url = "https://notebooklm.google.com/"
|
|
|
+ driver.get(url)
|
|
|
+ time.sleep(3)
|
|
|
+
|
|
|
+ # 新建專案
|
|
|
+ new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]')
|
|
|
+ while not new_created:
|
|
|
+ time.sleep(0.1)
|
|
|
+ new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]')
|
|
|
+ new_created.click()
|
|
|
+ time.sleep(2)
|
|
|
+
|
|
|
+ # # 點擊 專案
|
|
|
+ # notebooks = driver.find_element(By.CLASS_NAME, 'project-buttons-flow ng-star-inserted')
|
|
|
+ # # 選取第一個 project
|
|
|
+ # new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button')
|
|
|
+ # while not new_notebook:
|
|
|
+ # time.sleep(0.1)
|
|
|
+ # new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button')
|
|
|
+ # new_notebook.click()
|
|
|
+
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ # 將網址都輸入
|
|
|
+ for i, url in enumerate(urls):
|
|
|
+ time.sleep(5)
|
|
|
+ if i == 0:
|
|
|
+ 上傳來源 = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color')
|
|
|
+ while not 上傳來源:
|
|
|
+ time.sleep(0.1)
|
|
|
+ 上傳來源 = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color')
|
|
|
+ 上傳來源.click()
|
|
|
+ time.sleep(3)
|
|
|
+ # 找網站按鈕
|
|
|
+ text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]')
|
|
|
+ while not text_click:
|
|
|
+ time.sleep(0.1)
|
|
|
+ text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]')
|
|
|
+ text_click.click()
|
|
|
+ time.sleep(1)
|
|
|
+ # 找到 textarea 並輸入網址
|
|
|
+ website_input = driver.find_element(By.ID, 'mat-input-0')
|
|
|
+ while not text_click:
|
|
|
+ time.sleep(0.1)
|
|
|
+ website_input = driver.find_element(By.ID, 'mat-input-0')
|
|
|
+ website_input.send_keys(urls[0])
|
|
|
+ website_input.send_keys(Keys.RETURN)
|
|
|
+ print(f'成功輸入網址: {url}')
|
|
|
+ time.sleep(7)
|
|
|
+ else:
|
|
|
+ try:
|
|
|
+ WebDriverWait(driver, 20).until(
|
|
|
+ EC.presence_of_element_located((By.CLASS_NAME, "mdc-button__label"))
|
|
|
+ )
|
|
|
+ 新增來源 = driver.find_elements(By.CLASS_NAME, "mdc-button__label")[2]
|
|
|
+ print(新增來源.is_displayed())
|
|
|
+ while not 新增來源:
|
|
|
+ time.sleep(0.1)
|
|
|
+ 新增來源 = driver.find_element(By.CLASS_NAME, "mdc-button__label")[2]
|
|
|
+ 新增來源.click()
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ # 嘗試使用 JavaScript 來檢查元素是否可用
|
|
|
+ driver.execute_script("return document.querySelector('button #mat-tab-group-0-content-0 > div > div > div > source-picker > div > div.ng-tns-c2551705568-5.ng-star-inserted > button > span.mat-mdc-button-persistent-ripple.mdc-button__ripple') != null")
|
|
|
+ time.sleep(3)
|
|
|
+ # 找網站按鈕
|
|
|
+ try:
|
|
|
+ # 取得整個按鈕列表
|
|
|
+ clicks = driver.find_element(By.CLASS_NAME, 'chip-groups').find_elements(By.CSS_SELECTOR, '.chip-group.ng-star-inserted')
|
|
|
+ print(clicks)
|
|
|
+ text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
|
|
|
+ while not text_click:
|
|
|
+ time.sleep(0.1)
|
|
|
+ text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
|
|
|
+ text_click.click()
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ # 如果捕獲到錯誤,使用 JavaScript 強制點擊
|
|
|
+ try:
|
|
|
+ text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip')
|
|
|
+ driver.execute_script("arguments[0].click();", text_click) # 使用 JS 點擊元素
|
|
|
+ print("使用 JavaScript 成功點擊元素")
|
|
|
+ except Exception as js_error:
|
|
|
+ print(f'JavaScript 點擊錯誤: {js_error}')
|
|
|
+ time.sleep(2)
|
|
|
+ # 找到 textarea 並輸入網址
|
|
|
+ # 取得上面幾層
|
|
|
+ div = driver.find_element(By.TAG_NAME, 'website-upload')
|
|
|
+ # print(div.text)
|
|
|
+ website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input')
|
|
|
+ # 確保元素可以操作
|
|
|
+ while not website_input.is_displayed() or not website_input.is_enabled():
|
|
|
+ time.sleep(0.1)
|
|
|
+ website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input')
|
|
|
+ try:
|
|
|
+ website_input.send_keys(urls[0])
|
|
|
+ website_input.send_keys(Keys.RETURN)
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ # 使用 JavaScript 強制發送鍵盤事件
|
|
|
+ driver.execute_script("arguments[0].value = arguments[1];", website_input, urls) # 將文字輸入到 input
|
|
|
+ driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", website_input) # 觸發 input 事件
|
|
|
+ print(f'成功輸入網址: {url}')
|
|
|
+ time.sleep(7)
|
|
|
+
|
|
|
+ time.sleep(5)
|
|
|
+
|
|
|
+ # 上面幾層
|
|
|
+ omnibar = driver.find_element(By.TAG_NAME, 'chat-panel').find_element(By.TAG_NAME, 'omnibar')
|
|
|
+ box = omnibar.find_element(By.TAG_NAME, 'query-box')
|
|
|
+ prompt_input = box.find_element(By.TAG_NAME, 'textarea')
|
|
|
+ while not prompt_input.is_displayed() or not prompt_input.is_enabled():
|
|
|
+ time.sleep(0.1)
|
|
|
+ prompt_input = box.find_element(By.TAG_NAME, 'textarea')
|
|
|
+
|
|
|
+ j = 1
|
|
|
+ fore_content = ''
|
|
|
+ while '##' not in fore_content:
|
|
|
+ # 輸入生成文章的 prompt
|
|
|
+ complete_prompt = "1. 彙整並生成一篇以常見問題為主的專業文章(給客戶看的,最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式,不要條列式。5. 若內容不是中文,要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
|
|
|
+ try:
|
|
|
+ prompt_input.send_keys(complete_prompt)
|
|
|
+ prompt_input.send_keys(Keys.RETURN)
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ # 使用 JavaScript 強制發送鍵盤事件
|
|
|
+ driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt) # 將文字輸入到 input
|
|
|
+ driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input) # 觸發 input 事件
|
|
|
+
|
|
|
+
|
|
|
+ print('等候答案')
|
|
|
+ time.sleep(45) # 等待答案生成
|
|
|
+ message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
|
|
|
+ answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
+ while not answers[1].is_displayed() or not answers[1].is_enabled():
|
|
|
+ time.sleep(0.1)
|
|
|
+ answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
+ fore_content = message.text
|
|
|
+ j+=2
|
|
|
+
|
|
|
+ # 直接生成 md 檔 或是 生成文字?
|
|
|
+ texts = []
|
|
|
+ for i, answer in enumerate(answers[1:]):
|
|
|
+ if '###' not in fore_content:
|
|
|
+ if i == 0:
|
|
|
+ texts.append('# ' + answer.text.strip())
|
|
|
+ elif i % 2 == 1 and i != len(answers[1:])-1:
|
|
|
+ texts.append('## ' + answer.text.strip())
|
|
|
+ elif i == len(answers[1:])-1:
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ texts.append(answer.text.strip())
|
|
|
+ else:
|
|
|
+ if '# # ' in answer.text:
|
|
|
+ t = answer.text.replace('# # ', '# ').strip()
|
|
|
+ texts.append(t)
|
|
|
+ else:
|
|
|
+ texts.append(answer.text.strip())
|
|
|
+
|
|
|
+ print(texts)
|
|
|
+ if len(texts) > 1:
|
|
|
+ content = '\n'.join(texts)
|
|
|
+ else:
|
|
|
+ content = texts[0].replace('# # ', '# ')
|
|
|
+ print(content)
|
|
|
+ is_succesed = self.save_article_as_md(content)
|
|
|
+ if is_succesed:
|
|
|
+ return 'article.md 生成成功'
|
|
|
+ else:
|
|
|
+ return '存檔失敗'
|
|
|
+ except Exception as e:
|
|
|
+ print("出現錯誤: ", str(e))
|
|
|
+ return False
|
|
|
+
|
|
|
+ if self.driver:
|
|
|
+ time.sleep(5)
|
|
|
+ self.driver.quit()
|
|
|
+
|
|
|
+ def save_article_as_md(self, content, filename="./article.md"):
|
|
|
+ # 打開或創建一個 .md 文件
|
|
|
+ try:
|
|
|
+ with open(filename, 'w+', encoding='utf-8-sig') as file:
|
|
|
+ # 將文章內容寫入文件
|
|
|
+ file.write(content)
|
|
|
+ print(f"文章已成功保存為 {filename}")
|
|
|
+ return True
|
|
|
+ except Exception as e:
|
|
|
+ print(f"保存文章時發生錯誤: {str(e)}")
|
|
|
+ return False
|
|
|
+
|
|
|
+# 假設生成的文章內容如下(這會是你的生成內容)
|
|
|
+# 測試區域(僅在此模組直接執行時執行)
|
|
|
+if __name__ == "__main__":
|
|
|
+ # driver_path = 'chromedriver-win32/chromedriver.exe'
|
|
|
+ user_data_dir = 'C:/Users/wangy/AppData/Local/Google/Chrome/User Data'
|
|
|
+ profile_directory = 'Profile 20'
|
|
|
+
|
|
|
+ # 範例用法
|
|
|
+ article_generator = ArticleGenerator(user_data_dir, profile_directory)
|
|
|
+ urls = [
|
|
|
+ "https://www.virtualhumans.org/",
|
|
|
+ "https://en.wikipedia.org/wiki/Virtual_human",
|
|
|
+ "https://www.d-id.com/resources/glossary/virtual-humans/"
|
|
|
+ ]
|
|
|
+
|
|
|
+ is_succesed = article_generator.generate_article(urls)
|
|
|
+ if is_succesed:
|
|
|
+ print(is_succesed)
|
|
|
+
|