#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time #import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time #import redis import sys import codecs import random import os import time import requests driver=None dockername='p4444' is_docker=True #is_docker=False db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') if is_docker: portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop '+dockername) time.sleep(0.5) os.system('docker container rm '+dockername) time.sleep(0.5) os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0') time.sleep(7) def re_get_webdriver(): global port global driver global portnum global is_docker result=[] if driver is not None: print('closing....') driver.quit() print('quit....') driver=None try: options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--headless") options.add_argument("--incognito") # options.add_argument('--proxy-server=socks5://172.104.92.245:14900') mobile_emulation = { "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 }, "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" } # options.add_experimental_option("mobileEmulation", mobile_emulation) if is_docker: try: driver = webdriver.Remote( command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', options=options) except: traceback.print_exc() time.sleep(9999) return None return driver try: driver = webdriver.Chrome(options=options) except: traceback.print_exc() return None return driver except: traceback.print_exc() driver=None return None return driver def run_once(jsobj): # table=db['seo_jobs_ranking'] # table=db['seo_test_jobs_ranking'] table=db['seo_jobs'] # history=db['seo_search_history'] print(jsobj) kw=jsobj['kw'] i=100 while True: driver=re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw=jsobj['kw'] if jsobj.get('domain') is None: exclude=jsobj['exclude'] domain=None else: domain=jsobj['domain'] exclude=None driver.get('https://www.google.com?num=100') time.sleep(1) while True: try: print(driver.current_url) break except: traceback.print_exc() driver=re_get_webdriver() time.sleep(3) driver.get('https://www.google.com?num=100') # time.sleep(3) time.sleep(3) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") elmt = driver.find_element(By.XPATH, "//textarea[@type='search']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(3) # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']") numresults=len(elmts) print('搜尋結果數量',numresults) if numresults==0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx=1 found=False test_lst=[] clickelmt=None neg_count=0 neg_total=0 clickidx=0 clickhref='' clicktitle='' for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text # history.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt or '不倫' in txt or '開房' in txt or '摩鐵' in txt: neg_count+=1 neg_total+=idx if len(txt)>10: if domain is not None: random.shuffle(domain) for d in domain: if d in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found=True clickelmt=elmt clickidx=idx clickhref=href clicktitle=txt else: if exclude not in href: test_lst.append(elmt) idx+=1 if exclude is not None: print('exclude') elmt=random.choice(test_lst) print(elmt) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) if neg_count ==0: negstr='0' else: negstr=str(neg_total/neg_count) print(' negative: ' +negstr) if neg_total > 0: print('negative.....') if not found: True print('not found') # table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) else: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() print('clicked...') entry={'cust':jsobj['cust'],'plan':jsobj['plan'],'prefix':'','postfix':'','domain':str(domain),'kw':kw,'positive':str([''])} if numresults >=40: print(entry) table.insert(entry) # table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr}) time.sleep(6) print('sleep 6') return except: traceback.print_exc() print('exception') traceback.print_exc() driver.quit() time.sleep(5) #r=random.randint(0,7) r=987 #JNOTE: 關鍵字點擊 related='' cursor=db.query('SELECT id,cust,plan,prefix,domain,kw,positive FROM public.seo_random_test_jobs order by random() limit 1') for c in cursor: id=c['id'] cust=c['cust'] kw=c['kw'] plan=c['plan'] prefix=c['prefix'] domain=eval(c['domain']) positive=eval(c['positive']) break print('delete from seo_random_test_jobs where id='+str(c['id'])+'') db.query('delete from seo_random_test_jobs where id='+str(c['id'])+'') # positive=['台北室內設計公司排名'] # positive=[related] # positive=['半 日照 植物 推薦'] # positive=['3 坪 多大'] # positive=['鞋櫃'] # positive=['裝修屋子'] # positive=[''] # kw='幸福空間' # kw='輕裝修' # kw='輕裝修' #朱英凱 #琢隱設計 #os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com') newkw=prefix+" "+kw+' '+random.choice(positive) print(newkw) #newkw=kw run_once({'domain':domain,'kw':newkw,'id':id,'cust':cust,'plan':plan,'positive':positive})