import traceback from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import datetime import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random from bs4 import BeautifulSoup import requests import time # import rpyc import sys import docker # import googlesearch import codecs import sys import time # import dataset import os def process_one(driver): lst=[] elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") for elmt in elmts: try: href=elmt.get_attribute('href') # print(href) txt=elmt.text.split('\n') print(txt[0]) lst.append({'title':txt[0],'url':href}) except: print('href2 exception') traceback.print_exc() return lst def process_query(driver,qs,number_results=10,language_code='en',enable_next=True): escaped_search_term=urllib.parse.quote(qs) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code) print(googleurl) driver.get(googleurl) time.sleep(3) totallst=[] while True: lst=process_one(driver) totallst+=lst try: if enable_next: time.sleep(3) elmt=driver.find_element_by_xpath("//a[@id='pnnext']") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() else: break except: traceback.print_exc() print('pnnext exception') break time.sleep(1.5) return totallst result=[] driver=None def restart_browser(): # os.system('docker container restart p4444') # time.sleep(10) options = webdriver.ChromeOptions() # options.add_argument("--proxy-server=http://80.48.119.28:8080") driver=webdriver.Chrome(executable_path='/Users/zooeytsai/Downloads/chromedriver',options=options) # driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) #driver = webdriver.Remote( # command_executor='http://127.0.0.1:4444/wd/hub', #desired_capabilities=options.to_capabilities()) # desired_capabilities=DesiredCapabilities.CHROME) driver.set_window_size(1400,1000) return driver