import traceback import dataset from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.service import Service import json import redis import sys import random import os import time from userAgentRandomizer import userAgents import requests #import pymysql #pymysql.install_as_MySQLdb() driver = None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') headers = { "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": kw} r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) def re_get_webdriver(): global port global driver result = [] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver = None try: s = Service('/root/driver/chromedriver102') options = webdriver.ChromeOptions() options.add_argument("--disable-dev-shm-usage") options.add_argument("--headless") options.add_argument('--remote-debugging-port=9222') options.add_experimental_option("debuggerAddress", '127.0.0.1:9927') options.add_argument("--incognito") r = redis.Redis(host='db.ptt.cx', port=6379, db=2, password='choozmo9') data = r.get('google_proxy') jstext = data.decode('utf-8') jsobj = json.loads(jstext) proxy = random.choice(jsobj) change_ip_list = ['--proxy-server=%s' % proxy, "--proxy-server=socks5://127.0.0.1:9050", "--proxy-server=socks5://192.53.174.202:8180"] change_ip = random.choice(change_ip_list) options.add_argument(change_ip) print('使用代理ip', change_ip) driver = webdriver.Chrome(options=options,service=s) #driver.delete_all_cookies() driver.set_window_size(1400, 1000) except: traceback.print_exc() driver = None return None def run_once(jsobj): table = db['rank_detection'] print(jsobj) global driver # i=random.randint(0,9) i = 100 if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw = jsobj['kw'] if jsobj.get('domain') is None: exclude = jsobj['exclude'] domain = None else: domain = jsobj['domain'] exclude = None # driver.get('https://www.google.com?num=100') driver.get('https://www.google.com?num=20') time.sleep(3) print(driver.current_url) elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) print(driver.current_url) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") numresults = len(elmts) print('搜尋結果數量', numresults) if numresults == 0: send_msg('stop working...') sys.exit() idx = 1 found = False test_lst = [] txt_dict = {} for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text if len(txt) > 10: if domain is not None: if domain in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found = True webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() table.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt}) time.sleep(6) break else: ex = False for ee in exclude: if ee in href: ex = True if not ex: test_lst.append(elmt) txt_dict[elmt] = txt idx += 1 if exclude is not None: print('exclude') elmt = random.choice(test_lst[5:]) print(elmt) print(txt_dict[elmt]) webdriver.ActionChains(driver).move_to_element(elmt).perform() elmt.click() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) if not found: table.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄'}) except: print('exception') traceback.print_exc() sys.exit() driver.quit() db.close() # par1=sys.argv[1] # port=sys.argv[2] # kws=['職籃','PLG','高雄','鋼鐵人','內幕','中資','股東','姊夫','中國','老賴','香港','無極','原始股東','外資','董事長','股權結構','高雄人','黑人','陳建州','職籃聯盟','球團','球團高層','香港無極','張憲銘','吳同喬','監察人'] kws = ['金融', '人才', '國際接軌', '國際', '投資金童', '投資', '金童', '對沖基金', '香港', '外資', '原始股東', '職籃', 'PLG', '職籃聯盟', '球團', '台灣女婿', '抹紅', '保守', '港元', '美國', '升息', '戰爭', '通膨', '亞洲', '亞洲投資金童'] positive = ['錢濤','亞洲投資金童', '錢濤 職籃夢'] os.system('docker container restart tiny6') kw = random.choice(kws) # time.sleep(9) # run_once({'domain':'ettoday.net','kw':'錢濤'}) # run_once({'exclude':['moreptt.com','ptt.cc','tnews.cc','mirrormedia.mg','newtalk.tw','pourquoi.tw','match.net.tw','freshweekly.tw','z-upload.facebook.com','udn.com'],'kw':kw+' 錢濤'}) domains = ['yahoo.com', 'ettoday.net', 'tvbs.com.tw', 'sina.com.tw', 'ltn.com.tw', 'owlting.com', 'ctee.com.tw'] domain = random.choice(domains) p = random.choice(positive) # run_once({'domain':domain,'kw':p}) run_once({'domain': 'ettoday.net', 'kw': p}) # run_once({'domain':domain,'kw':kw+' 錢濤'})