zooey
/
article_generate


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
							import undetected_chromedriver as uc
import time
import os
import urllib
from selenium.webdriver.common.by import By
import sys

driver = None
def re_get_webdriver():
    global port
    global driver
    global portnum
    global is_docker
    result = []
    if driver is not None:
        print('closing....')
        driver.quit()
        print('quit....')
        driver = None
    try:
        options = uc.ChromeOptions()
        # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")

        options.add_argument("--window-size=200,100")  # 縮小視窗
        options.add_argument("--window-position=-32000,-32000")  # 移到螢幕外
        # for window in gw.getWindowsWithTitle("Chrome"):
        #     window.minimize()
        driver = uc.Chrome(options=options)
        driver.delete_all_cookies()
    except:
        driver = None
        return None

    return driver


def get_resource(kw):
    while True:
        driver = re_get_webdriver()
        print('re_get_webdriver')
        if driver is not None:
            break
        time.sleep(3)
    try:
        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
        # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
        # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
        driver.get(googleurl)

        time.sleep(6)
        print(driver.current_url)
        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
        numresults = len(elmts)
        print('搜尋結果數量', numresults)
        if numresults == 0:
            print(driver.current_url)
            print(driver.title)
            sys.exit()

        resources_list = []

        for elmt in elmts:
            href = elmt.get_attribute('href')
            resources_list.append(href)
        print(resources_list)
        return resources_list

    except Exception as e:
        print('exception')
        return None

    driver.quit()

get_resource('') # 取得搜尋結果第一頁網址來源