zooey 8 mesi fa
commit
dd1a8da502
1 ha cambiato i file con 74 aggiunte e 0 eliminazioni
  1. 74 0
      resources_notebook.py

+ 74 - 0
resources_notebook.py

@@ -0,0 +1,74 @@
+import undetected_chromedriver as uc
+import time
+import os
+import urllib
+from selenium.webdriver.common.by import By
+import sys
+
+driver = None
+def re_get_webdriver():
+    global port
+    global driver
+    global portnum
+    global is_docker
+    result = []
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        print('quit....')
+        driver = None
+    try:
+        options = uc.ChromeOptions()
+        # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
+
+        options.add_argument("--window-size=200,100")  # 縮小視窗
+        options.add_argument("--window-position=-32000,-32000")  # 移到螢幕外
+        # for window in gw.getWindowsWithTitle("Chrome"):
+        #     window.minimize()
+        driver = uc.Chrome(options=options)
+        driver.delete_all_cookies()
+    except:
+        driver = None
+        return None
+
+    return driver
+
+
+def get_resource(kw):
+    while True:
+        driver = re_get_webdriver()
+        print('re_get_webdriver')
+        if driver is not None:
+            break
+        time.sleep(3)
+    try:
+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
+        # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
+        # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
+        driver.get(googleurl)
+
+        time.sleep(6)
+        print(driver.current_url)
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
+        numresults = len(elmts)
+        print('搜尋結果數量', numresults)
+        if numresults == 0:
+            print(driver.current_url)
+            print(driver.title)
+            sys.exit()
+
+        resources_list = []
+
+        for elmt in elmts:
+            href = elmt.get_attribute('href')
+            resources_list.append(href)
+        print(resources_list)
+        return resources_list
+
+    except Exception as e:
+        print('exception')
+        return None
+
+    driver.quit()
+
+get_resource('') # 取得搜尋結果第一頁網址來源