test.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import time
  2. import os
  3. import urllib
  4. from selenium.webdriver.common.by import By
  5. from pyvirtualdisplay import Display
  6. import sys
  7. import urlToarticle
  8. from fastapi import FastAPI, Form, Request
  9. from fastapi.responses import FileResponse, HTMLResponse
  10. from fastapi.templating import Jinja2Templates
  11. from pydantic import BaseModel
  12. from typing import List
  13. from pyvirtualdisplay import Display
  14. from selenium.webdriver.chrome.service import Service
  15. import undetected_chromedriver as uc
  16. app = FastAPI()
  17. templates = Jinja2Templates(directory="templates")
  18. driver = None
  19. driver_path = '/usr/local/bin/chromedriver'
  20. user_data_dir = '/home/ling/.config/google-chrome'
  21. profile_directory = 'Default'
  22. def re_get_webdriver():
  23. global port
  24. global driver
  25. global portnum
  26. global is_docker
  27. result = []
  28. if driver is not None:
  29. print('closing....')
  30. driver.quit()
  31. print('quit....')
  32. driver = None
  33. try:
  34. display = Display(visible=0, size=(1920, 1080))
  35. display.start()
  36. service = Service(driver_path)
  37. options = uc.ChromeOptions()
  38. #options.add_argument("--window-size=200,100") # 縮小視窗
  39. #options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
  40. options.add_argument("--no-sandbox")
  41. options.add_argument("--disable-dev-shm-usage")
  42. options.add_argument("--disable-blink-features=AutomationControlled")
  43. driver = uc.Chrome(options=options, version_main=132)
  44. driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
  45. time.sleep(3)
  46. print(driver.current_url)
  47. except:
  48. driver = None
  49. return driver
  50. return driver
  51. def get_resource(kw):
  52. max_attempts = 2
  53. attempts = 0
  54. while attempts<max_attempts:
  55. driver = re_get_webdriver()
  56. print('re_get_webdriver')
  57. if driver is not None:
  58. break
  59. time.sleep(3)
  60. attempts+=1
  61. try:
  62. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  63. # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  64. # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
  65. driver.get(googleurl)
  66. time.sleep(6)
  67. print(driver.current_url)
  68. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  69. numresults = len(elmts)
  70. print('搜尋結果數量', numresults)
  71. if numresults == 0:
  72. print(driver.current_url)
  73. print(driver.title)
  74. sys.exit()
  75. resources_list = []
  76. for elmt in elmts[0:11]:
  77. href = elmt.get_attribute('href')
  78. resources_list.append(href)
  79. print(resources_list)
  80. return resources_list
  81. except Exception as e:
  82. print('exception')
  83. return None
  84. driver.quit()
  85. urls = get_resource('書房 設計 北歐') # 取得搜尋結果第一頁網址來源
  86. #urls = ['https://www.100.com.tw/article/4359', 'https://yes-99.com/news-info.asp?id=278']
  87. #article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory)
  88. #is_succesed = article_generator.generate_article(urls)
  89. #if is_succesed:
  90. # print(is_succesed)