import traceback from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import datetime import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random from bs4 import BeautifulSoup import requests import time # import rpyc import sys import docker # import googlesearch import codecs import sys import time import dataset import os import html2text def process_one(driver): lst=[] elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") for elmt in elmts: try: href=elmt.get_attribute('href') # print(href) txt=elmt.text.split('\n') print(txt[0]) lst.append({'title':txt[0],'url':href}) except: print('href2 exception') traceback.print_exc() return lst def process_query(driver,qs,number_results=10,language_code='zh-TW',enable_next=True): escaped_search_term=urllib.parse.quote(qs) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code) print(googleurl) driver.get(googleurl) time.sleep(3) totallst=[] while True: lst=process_one(driver) totallst+=lst try: if enable_next: time.sleep(3) elmt=driver.find_element_by_xpath("//a[@id='pnnext']") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() else: break except: traceback.print_exc() print('pnnext exception') break time.sleep(1.5) return totallst result=[] driver=None def restart_browser(): # os.system('docker container restart p4444') # time.sleep(10) options = webdriver.ChromeOptions() # options.add_argument("--proxy-server=http://80.48.119.28:8080") # driver=webdriver.Chrome(executable_path='/Users/zooeytsai/Downloads/chromedriver',options=options) driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) #driver = webdriver.Remote( # command_executor='http://127.0.0.1:4444/wd/hub', #desired_capabilities=options.to_capabilities()) # desired_capabilities=DesiredCapabilities.CHROME) driver.set_window_size(1400,1000) return driver db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table=db['kw_url_search_result'] driver=restart_browser() lst=process_query(driver,'班尼斯 site:mobile01.com',number_results=50,language_code='zh-TW',enable_next=False) for l in lst: table.insert(l) print(lst) #print(html2text.html2text("
Zed's dead baby, Zed's dead.
"))