# import redis import time import traceback # import json from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time # import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time # import redis import sys import codecs import random import datetime import os import time import requests import urllib.parse import ast driver = None db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') # headers = { # "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN", # "Content-Type": "application/x-www-form-urlencoded" headers = { "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": kw} print('通知結果', params) r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) def re_get_webdriver(): global port global driver global portnum global is_docker result = [] if driver is not None: print('closing....') driver.quit() print('quit....') driver = None try: s = Service('/Users/mac/Downloads/127/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19") options.add_argument("--incognito") driver = webdriver.Chrome( options=options, service=s) driver.delete_all_cookies() driver.set_window_size(1400, 1000) except: traceback.print_exc() driver = None return None return driver def run_once(jsobj): table = db['seo_jobs_ranking'] history = db['seo_search_history'] nda_log = db['nda_log'] delete_kw = db['delete_kw'] seo = db['seo'] print(jsobj) neg_word = ast.literal_eval(jsobj['neg_word']) print('這裏',neg_word) i = 100 while True: driver = re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw = jsobj['kw'] domain = jsobj['domain'] # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW') googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW') driver.get(googleurl) time.sleep(6) print(driver.current_url) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # elmt.send_keys(Keys.ENTER) # time.sleep(6) # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a") numresults = len(elmts) print('搜尋結果數量', numresults) if numresults == 0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx = 1 found = 0 test_lst = [] clickelmt = None neg_count = 0 neg_total = 0 clickidx = 0 clickhref = '' clicktitle = '' for elmt in elmts: href = elmt.get_attribute('href') # print(href) txt = elmt.text history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()}) # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt: # neg_count+=1 # neg_total+=idx # print('分數',neg_total, neg_count) for i in neg_word: if i in txt: neg_count += 1 neg_total += idx break # print('分數',neg_total, neg_count) if domain in href: print('found....') print(href) print(txt) print("ranking", idx) found = True clickelmt = elmt clickidx = idx clickhref = href clicktitle = txt found = 1 else: if found == 1: not_found = 0 else: not_found = 1 idx += 1 if not_found == 1: print('未收錄') nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']}) seo.delete(kw=kw, domain=domain) delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()}) msg_1 = '未收錄:'+kw+' '+domain msg_2 = jsobj['delete_kw_count'] send_msg(msg_1 + "\n" + str(msg_2)) else: nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':'vi'}) webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() print('clicked....') time.sleep(5) if neg_count == 0: negstr = 0 else: negstr = neg_total / neg_count print(negstr) if negstr > 0 and negstr < 21: print('警示字') msg_1 = '警示字:' + kw msg_2 = jsobj['delete_kw_count'] send_msg(msg_1 + "\n" + str(msg_2)) seo.delete(kw=kw, domain=domain) delete_kw.insert({'kw': kw, 'domain': domain, 'cust': jsobj['client'],'dt':datetime.datetime.now()}) table.insert( {'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle, 'avg_neg': negstr, 'dt': datetime.datetime.now()}) except: traceback.print_exc() print('exception') traceback.print_exc() # db.close() driver.quit() while True: # cursor = db.query("select * from public.seo where cust='百威' and type is NULL order by random() limit 1") cursor = db.query("select * from public.seo where cust='信義房屋' and type='vi' order by random() limit 1") # cursor = db.query("select * from public.seo where id=627") cursor_n = db.query("select * from public.neg_word where client='信義房屋'") cursor_d = db.query("select * from public.delete_kw where now()::date = dt::date") for c in cursor: kw = c['kw'] domain = c['domain'] d = {'信義房屋':0,'真理大學':0,'驊揚':0,'百威':0} for c in cursor_d: if c['cust'] in d.keys(): d[c['cust']]+=1 print(d) for c in cursor_n: neg_word = c['neg_word'] run_once({'domain':domain,'kw':kw,'client':'信義房屋','neg_word':neg_word,'delete_kw_count':d}) # db.close() print('等待下次執行') time.sleep(80)