#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import redis import sys import codecs import random import os import time import requests driver=None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') def re_get_webdriver(): global port global driver global portnum result=[] if driver is not None: print('closing....') driver.quit() print('quit....') driver=None try: options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--headless") options.add_argument("--incognito") try: driver = webdriver.Remote( command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', options=options) except: traceback.print_exc() return None return driver except: traceback.print_exc() driver=None return None return driver def run_once(jsobj): table=db['rank_detection'] print(jsobj) kw=jsobj['kw'] i=100 while True: driver=re_get_webdriver() if driver is not None: break time.sleep(3) try: kw=jsobj['kw'] if jsobj.get('domain') is None: exclude=jsobj['exclude'] domain=None else: domain=jsobj['domain'] exclude=None driver.get('https://www.google.com?num=100') time.sleep(17) while True: try: print(driver.current_url) break except: traceback.print_exc() driver=re_get_webdriver() time.sleep(3) driver.get('https://www.google.com?num=100') time.sleep(3) time.sleep(3) elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") numresults=len(elmts) # time.sleep(9999) print('搜尋結果數量',numresults) if numresults==0: print(driver.current_url) print(driver.title) sys.exit() idx=1 found=False test_lst=[] for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if domain is not None: for d in domain: if d in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found=True webdriver.ActionChains(driver).move_to_element(elmt).perform() # elmt.click() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) time.sleep(6) return else: if exclude not in href: test_lst.append(elmt) idx+=1 if exclude is not None: print('exclude') elmt=random.choice(test_lst) print(elmt) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) if not found: table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) except: traceback.print_exc() print('exception') traceback.print_exc() driver.quit() time.sleep(5) r=random.randint(0,2) if r==0 or r==1: prefix="幸福空間 " postfix=' site:hhh.com.tw' domain=['hhh.com.tw'] positive=['','設計','設計師','室內 設計','裝潢','室內 裝修','設計 公司','裝潢','北歐風'] if r==2: prefix="" postfix=' site:hhh.com.tw' domain=['hhh.com.tw'] positive=['艾立思','','艾立思軟裝','艾立思集團','御見設計','艾立思 家具訂製','艾立思 精品家具','艾立思 軟裝飾品','ELIZ','艾立思 郭柏君','艾立思 家配師','艾立思 郭柏君'] portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop p4444') time.sleep(1) os.system('docker container rm p4444') time.sleep(1) os.system('docker run -d -p '+str(portnum)+':4444 --name p4444 --dns 168.95.1.1 selenium/standalone-chrome:103.0') time.sleep(18) kw=random.choice(positive) run_once({'domain':domain,'kw':prefix+" "+kw})