1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- import undetected_chromedriver as uc
- import time
- import os
- import urllib
- from selenium.webdriver.common.by import By
- import sys
- driver = None
- def re_get_webdriver():
- global port
- global driver
- global portnum
- global is_docker
- result = []
- if driver is not None:
- print('closing....')
- driver.quit()
- print('quit....')
- driver = None
- try:
- options = uc.ChromeOptions()
- # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
- options.add_argument("--window-size=200,100") # 縮小視窗
- options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
- # for window in gw.getWindowsWithTitle("Chrome"):
- # window.minimize()
- driver = uc.Chrome(options=options)
- driver.delete_all_cookies()
- except:
- driver = None
- return None
- return driver
- def get_resource(kw):
- while True:
- driver = re_get_webdriver()
- print('re_get_webdriver')
- if driver is not None:
- break
- time.sleep(3)
- try:
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
- # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
- # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
- driver.get(googleurl)
- time.sleep(6)
- print(driver.current_url)
- elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
- numresults = len(elmts)
- print('搜尋結果數量', numresults)
- if numresults == 0:
- print(driver.current_url)
- print(driver.title)
- sys.exit()
- resources_list = []
- for elmt in elmts:
- href = elmt.get_attribute('href')
- resources_list.append(href)
- print(resources_list)
- return resources_list
- except Exception as e:
- print('exception')
- return None
- driver.quit()
- get_resource('') # 取得搜尋結果第一頁網址來源
|