#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time #import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time #import redis import sys import codecs import random import os import time import requests driver=None dockername='p4444' is_docker=True #is_docker=False db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') if is_docker: portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop '+dockername) time.sleep(0.5) os.system('docker container rm '+dockername) time.sleep(0.5) os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0') time.sleep(7) def re_get_webdriver(): global port global driver global portnum global is_docker result=[] if driver is not None: print('closing....') driver.quit() print('quit....') driver=None try: options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--headless") options.add_argument("--incognito") # options.add_argument('--proxy-server=socks5://172.104.92.245:14900') mobile_emulation = { "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 }, "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" } # options.add_experimental_option("mobileEmulation", mobile_emulation) if is_docker: try: driver = webdriver.Remote( command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', options=options) except: traceback.print_exc() time.sleep(9999) return None return driver try: driver = webdriver.Chrome(options=options) except: traceback.print_exc() return None return driver except: traceback.print_exc() driver=None return None return driver def run_once(jsobj): table=db['seo_jobs_ranking'] history=db['seo_search_history'] print(jsobj) kw=jsobj['kw'] i=100 while True: driver=re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw=jsobj['kw'] if jsobj.get('domain') is None: exclude=jsobj['exclude'] domain=None else: domain=jsobj['domain'] exclude=None driver.get('https://www.google.com?num=100') time.sleep(1) while True: try: print(driver.current_url) break except: traceback.print_exc() driver=re_get_webdriver() time.sleep(3) driver.get('https://www.google.com?num=100') # time.sleep(3) time.sleep(3) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") elmt = driver.find_element(By.XPATH, "//textarea[@type='search']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(3) # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']") numresults=len(elmts) print('搜尋結果數量',numresults) if numresults==0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx=1 found=False test_lst=[] clickelmt=None neg_count=0 neg_total=0 clickidx=0 clickhref='' clicktitle='' for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text history.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt: neg_count+=1 neg_total+=idx if len(txt)>10: if domain is not None: random.shuffle(domain) for d in domain: if d in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found=True clickelmt=elmt clickidx=idx clickhref=href clicktitle=txt else: if exclude not in href: test_lst.append(elmt) idx+=1 if exclude is not None: print('exclude') elmt=random.choice(test_lst) print(elmt) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) if neg_count ==0: negstr='0' else: negstr=str(neg_total/neg_count) print(' negative: ' +negstr) if not found: True table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) else: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() print('clicked...') table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr}) time.sleep(6) print('sleep 6') return except: traceback.print_exc() print('exception') traceback.print_exc() driver.quit() time.sleep(5) #r=random.randint(0,7) r=987 #JNOTE: 關鍵字點擊 related='' entries=[] entry={'cust':'啟翔', 'plan':'文章', 'prefix':'', 'kw':'1111 4970-808', 'postfix':'', 'domain':['Comp_Info.aspx?vNo=48173'], 'positive':['']} entries.append(entry) entry={'cust':'啟翔', 'plan':'文章', 'prefix':'', 'kw':'作業員 4970808', 'postfix':'', 'domain':['job/3uhrh'], 'positive':['']} entries.append(entry) entry={'cust':'啟翔', 'plan':'文章', 'prefix':'', 'kw':'facebook 100078739363391 啟翔', 'postfix':'', 'domain':['100078739363391'], 'positive':['']} entries.append(entry) entry={'cust':'啟翔', 'plan':'文章', 'prefix':'', 'kw':'廢水專責人員 104 啟翔', 'postfix':'', 'domain':['job/7wih0'], 'positive':['']} entries.append(entry) entry={'cust':'加百裕', 'plan':'文章', 'prefix':'', 'kw':'黃靖容中醫', 'postfix':'', 'domain':['web.csh.org.tw'], 'positive':['']} entries.append(entry) entry=random.choice(entries) #朱英凱 #琢隱設計 #os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com') newkw=entry['prefix']+" "+entry['kw']+' '+random.choice(entry['positive']) print(newkw) #newkw=kw run_once({'domain':entry['domain'],'kw':newkw})