Jared 3 lat temu
rodzic
commit
788b37e036
1 zmienionych plików z 85 dodań i 0 usunięć
  1. 85 0
      choozmo/gsearch_general.py

+ 85 - 0
choozmo/gsearch_general.py

@@ -0,0 +1,85 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+def process_one(driver):
+    lst=[]
+    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    for elmt in elmts:
+        try:
+            href=elmt.get_attribute('href')
+#            print(href)
+            txt=elmt.text.split('\n')
+            print(txt[0])
+            lst.append({'title':txt[0],'url':href})
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    return lst
+
+def process_query(driver,qs,number_results=10,language_code='en',enable_next=True):
+    escaped_search_term=urllib.parse.quote(qs)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(3)
+    totallst=[]
+    while True:
+        lst=process_one(driver)
+        totallst+=lst
+        try:
+            if enable_next:
+                elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            else:
+                break
+        except:
+            traceback.print_exc()
+            print('pnnext exception')
+            break
+        time.sleep(1.5)
+    return totallst
+
+
+result=[]
+driver=None
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=http://80.48.119.28:8080")
+    driver=webdriver.Chrome(options=options)
+
+#    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+