import undetected_chromedriver as uc import time import os import urllib from selenium.webdriver.common.by import By import sys driver = None def re_get_webdriver(): global port global driver global portnum global is_docker result = [] if driver is not None: print('closing....') driver.quit() print('quit....') driver = None try: options = uc.ChromeOptions() # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19") options.add_argument("--window-size=200,100") # 縮小視窗 options.add_argument("--window-position=-32000,-32000") # 移到螢幕外 # for window in gw.getWindowsWithTitle("Chrome"): # window.minimize() driver = uc.Chrome(options=options) driver.delete_all_cookies() except: driver = None return None return driver def get_resource(kw): while True: driver = re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW') # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW') # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz' driver.get(googleurl) time.sleep(6) print(driver.current_url) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") numresults = len(elmts) print('搜尋結果數量', numresults) if numresults == 0: print(driver.current_url) print(driver.title) sys.exit() resources_list = [] for elmt in elmts: href = elmt.get_attribute('href') resources_list.append(href) print(resources_list) return resources_list except Exception as e: print('exception') return None driver.quit() get_resource('') # 取得搜尋結果第一頁網址來源