import random import sys import dataset from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By import traceback import datetime import codecs import time import urllib import argparse import logging import sys from logging.handlers import SysLogHandler import socket import pandas as pd import socket import os import dataset import pymysql pymysql.install_as_MySQLdb() driver = None def restart_browser(): global driver user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument('--remote-debugging-port=9222') # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922") options.add_argument("--user-agent=" + user_agent) options.add_argument("--incognito") # options.add_argument('--proxy-server=socks5://172.104.93.163:41800') driver = webdriver.Chrome(options=options, service=s) str1 = driver.capabilities['browserVersion'] driver.delete_all_cookies() driver.set_window_size(1400, 20000) return driver def process_one(): lst=['雙響泡','雙響砲','双響泡'] date='1214' for term in lst: driver=restart_browser() escaped_search_term=urllib.parse.quote(term) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW') driver.get(googleurl) time.sleep(6) print(driver.current_url) driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+date+term+'.png') df=pd.DataFrame() elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a") cnt=1 datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]} for elmt in elmts: try: href=elmt.get_attribute('href') datadict['搜尋詞'].append(term) datadict['結果標題'].append(elmt.text) datadict['結果網址'].append(href) datadict['結果名次'].append(str(cnt)) cnt+=1 except: print('href2 exception') traceback.print_exc() if len(datadict['結果標題'])<=0: print('None') driver.quit() sys.exit() df['搜尋詞']=datadict['搜尋詞'] df['結果標題']=datadict['結果標題'] df['結果網址']=datadict['結果網址'] df['結果名次']=datadict['結果名次'] df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+date+term+".xls") driver.quit() print('等待') time.sleep(random.randint(100,120)) process_one() os.system('docker container stop tiny8') time.sleep(3) os.system('docker container rm tiny8') time.sleep(3) os.system('docker run -d -p 9924:9222 --rm --shm-size="900m" --name tiny1 chromedp/headless-shell') time.sleep(3)