|
@@ -0,0 +1,74 @@
|
|
|
+import undetected_chromedriver as uc
|
|
|
+import time
|
|
|
+import os
|
|
|
+import urllib
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+import sys
|
|
|
+
|
|
|
+driver = None
|
|
|
+def re_get_webdriver():
|
|
|
+ global port
|
|
|
+ global driver
|
|
|
+ global portnum
|
|
|
+ global is_docker
|
|
|
+ result = []
|
|
|
+ if driver is not None:
|
|
|
+ print('closing....')
|
|
|
+ driver.quit()
|
|
|
+ print('quit....')
|
|
|
+ driver = None
|
|
|
+ try:
|
|
|
+ options = uc.ChromeOptions()
|
|
|
+ # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
|
|
|
+
|
|
|
+ options.add_argument("--window-size=200,100") # 縮小視窗
|
|
|
+ options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
|
|
|
+ # for window in gw.getWindowsWithTitle("Chrome"):
|
|
|
+ # window.minimize()
|
|
|
+ driver = uc.Chrome(options=options)
|
|
|
+ driver.delete_all_cookies()
|
|
|
+ except:
|
|
|
+ driver = None
|
|
|
+ return None
|
|
|
+
|
|
|
+ return driver
|
|
|
+
|
|
|
+
|
|
|
+def get_resource(kw):
|
|
|
+ while True:
|
|
|
+ driver = re_get_webdriver()
|
|
|
+ print('re_get_webdriver')
|
|
|
+ if driver is not None:
|
|
|
+ break
|
|
|
+ time.sleep(3)
|
|
|
+ try:
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
+ # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
+ # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
|
|
|
+ driver.get(googleurl)
|
|
|
+
|
|
|
+ time.sleep(6)
|
|
|
+ print(driver.current_url)
|
|
|
+ elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
|
|
|
+ numresults = len(elmts)
|
|
|
+ print('搜尋結果數量', numresults)
|
|
|
+ if numresults == 0:
|
|
|
+ print(driver.current_url)
|
|
|
+ print(driver.title)
|
|
|
+ sys.exit()
|
|
|
+
|
|
|
+ resources_list = []
|
|
|
+
|
|
|
+ for elmt in elmts:
|
|
|
+ href = elmt.get_attribute('href')
|
|
|
+ resources_list.append(href)
|
|
|
+ print(resources_list)
|
|
|
+ return resources_list
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print('exception')
|
|
|
+ return None
|
|
|
+
|
|
|
+ driver.quit()
|
|
|
+
|
|
|
+get_resource('') # 取得搜尋結果第一頁網址來源
|