# import redis import time import traceback # import json from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time # import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time # import redis import sys import codecs import random import datetime import os import time import requests import urllib.parse import ast driver = None db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') def re_get_webdriver(): global port global driver global portnum global is_docker result = [] if driver is not None: print('closing....') driver.quit() print('quit....') driver = None try: s = Service('/Users/mac/Downloads/119/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19") options.add_argument("--incognito") driver = webdriver.Chrome( options=options, service=s) driver.delete_all_cookies() driver.set_window_size(1400, 1000) except: traceback.print_exc() driver = None return None return driver def run_once(jsobj): table = db['seo_jobs_ranking'] history = db['seo_search_history'] nda_log = db['nda_log'] print(jsobj) neg_word = ast.literal_eval(jsobj['neg_word']) print('這裏',neg_word) i = 100 while True: driver = re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw = jsobj['kw'] googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW') driver.get(googleurl) time.sleep(6) print(driver.current_url) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # elmt.send_keys(Keys.ENTER) # time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") numresults = len(elmts) print('搜尋結果數量', numresults) if numresults == 0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx = 1 found = False test_lst = [] clickelmt = None neg_count = 0 neg_total = 0 clickidx = 0 clickhref = '' clicktitle = '' for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()}) # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt: # neg_count+=1 # neg_total+=idx # print('分數',neg_total, neg_count) for i in neg_word: print(i) if i in txt: neg_count += 1 neg_total += idx print('分數',neg_total, neg_count) if domain in href: print('found....') print(href) print(txt) print("ranking", idx) found = True clickelmt = elmt clickidx = idx clickhref = href clicktitle = txt nda_log.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt': datetime.datetime.now(), 'client': jsobj['client']}) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() print('clicked....') time.sleep(5) if neg_count == 0: negstr = '0' else: negstr = str(neg_total / neg_count) print(' negative: ' + negstr) table.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle, 'avg_neg': negstr, 'dt': datetime.datetime.now()}) db.close() break else: nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']}) idx += 1 db.close() except: traceback.print_exc() print('exception') traceback.print_exc() db.close() driver.quit() cursor = db.query("select cust, json from public.seo_job where cust='信義房屋' order by random() limit 1") cursor_n = db.query("select * from public.neg_word where client='信義房屋'") for c in cursor: js_string = c['json'] js = json.loads(js_string) prefix=js['prefix'] postfix=js['postfix'] domain=js['domain'][0] positive=js['positive'] rnd=js['rnd'] kw1=random.choice(positive) kw2=random.choice(rnd) # kw=kw1+" "+prefix+" "+kw2 kw = prefix + " " + kw1 for c in cursor_n: neg_word = c['neg_word'] while True: run_once({'domain':domain,'kw':'信義 房屋','client':'信義房屋','neg_word':neg_word}) print('等待下次執行') time.sleep(80)