#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time #import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time #import redis import sys import codecs import random import platform import socket import os import time import datetime import requests #JNOTE: CLICK TWICE driver=None dockername='p4444' is_docker=True #is_docker=False db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') table=db['prog_launch'] unamestr=str(platform.uname()) table.insert({'uname':unamestr,'progname':os.path.basename(__file__),'dt':datetime.datetime.now()}) url_white=[] cursor=db.query("select url from seo_whitelist where cust='啟翔'") for c in cursor: url_white.append(c['url']) print(url_white) if len(url_white) <=0: print('url_white loading failed') sys.exit() #sys.exit() #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') def scrolling(driver,pgnum): # ub = driver.find_element_by_css_selector('body') ub = driver.find_element(By.CSS_SELECTOR,'body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.3) if is_docker: portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop '+dockername) time.sleep(0.5) os.system('docker container rm '+dockername) time.sleep(0.5) os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0') time.sleep(7) def re_get_webdriver(): global port global driver global portnum global is_docker result=[] if driver is not None: print('closing....') driver.quit() print('quit....') driver=None try: options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--headless") options.add_argument("--incognito") # options.add_argument('--proxy-server=socks5://172.104.92.245:14900') mobile_emulation = { "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 }, "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" } # options.add_experimental_option("mobileEmulation", mobile_emulation) if is_docker: try: driver = webdriver.Remote( command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', options=options) except: traceback.print_exc() time.sleep(9999) return None return driver try: driver = webdriver.Chrome(options=options) except: traceback.print_exc() return None return driver except: traceback.print_exc() driver=None return None return driver def click_allowed(): global driver global url_white elmts = driver.find_elements(By.XPATH, "//a") elmt_lst=[] for elmt in elmts: elmt_lst.append(elmt) random.shuffle(elmt_lst) for elmt in elmt_lst: href=elmt.get_attribute('href') for url in url_white: if url in href: print(href) webdriver.ActionChains(driver).move_to_element(elmt).perform() time.sleep(1) webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) return def run_once(jsobj): table=db['seo_jobs_ranking'] # history=db['seo_search_history'] print(jsobj) kw=jsobj['kw'] i=100 while True: driver=re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw=jsobj['kw'] if jsobj.get('domain') is None: exclude=jsobj['exclude'] domain=None else: domain=jsobj['domain'] exclude=None driver.get('https://www.google.com?num=100') time.sleep(1) while True: try: print(driver.current_url) break except: traceback.print_exc() driver=re_get_webdriver() time.sleep(3) driver.get('https://www.google.com?num=100') # time.sleep(3) time.sleep(3) # time.sleep(9999) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") elmt = driver.find_element(By.XPATH, "//textarea[@name='q']") # elmt = driver.find_element(By.XPATH, "//textarea[@type='search']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(3) # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']") numresults=len(elmts) print('搜尋結果數量',numresults) if numresults==0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx=1 found=False test_lst=[] clickelmt=None neg_count=0 neg_total=0 clickidx=0 clickhref='' clicktitle='' for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text # history.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt: neg_count+=1 neg_total+=idx if len(txt)>10: if domain is not None: random.shuffle(domain) for d in domain: if d in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found=True clickelmt=elmt clickidx=idx clickhref=href clicktitle=txt else: if exclude not in href: test_lst.append(elmt) idx+=1 if exclude is not None: print('exclude') elmt=random.choice(test_lst) print(elmt) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() scrolling(driver,3) time.sleep(5) if neg_count ==0: negstr='0' else: negstr=str(neg_total/neg_count) print(' negative: ' +negstr) if not found: True table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) else: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() print('clicked...') table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr}) scrolling(driver,3) time.sleep(6) print('sleep 6') click_allowed() return except: traceback.print_exc() print('exception') traceback.print_exc() driver.quit() time.sleep(5) #r=random.randint(0,7) #r=987 #JNOTE: 關鍵字點擊 related='' #cursor=db.query('SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs order by random() limit 1') #cursor=db.query("SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs where cust='啟翔' order by random() limit 1") cursor=db.query("SELECT cust,kw,url FROM public.seo_doublejob where cust='啟翔' order by random() limit 1") for c in cursor: cust=c['cust'] kw=c['kw'] plan='' prefix='' domain=[c['url']] positive=[''] # positive=eval(c['positive']) break r=9999 if r==11: cust='啟翔' plan='形象SEO' postfix='' domain=['abba-tech-aluminum'] kw='啟翔輕金屬產品' prefix='' positive=[''] # positive=['集仕多'] # positive=['集仕多 AIGV'] # positive=['集仕多 三立'] # positive=['台北室內設計公司排名'] # positive=[related] # positive=['半 日照 植物 推薦'] # positive=['3 坪 多大'] # positive=['鞋櫃'] # positive=['裝修屋子'] # positive=[''] # kw='幸福空間' # kw='輕裝修' # kw='輕裝修' #朱英凱 #琢隱設計 #os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com') #newkw=prefix+" "+kw+' '+random.choice(positive) newkw=kw print(newkw) #newkw=kw run_once({'domain':domain,'kw':newkw})