|
@@ -0,0 +1,194 @@
|
|
|
+# import redis
|
|
|
+import time
|
|
|
+import traceback
|
|
|
+# import json
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.chrome.service import Service
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
+import time
|
|
|
+# import urllib
|
|
|
+import os
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+import dataset
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
+import json
|
|
|
+import random
|
|
|
+import time
|
|
|
+# import redis
|
|
|
+import sys
|
|
|
+import codecs
|
|
|
+import random
|
|
|
+import datetime
|
|
|
+import os
|
|
|
+import time
|
|
|
+import requests
|
|
|
+import urllib.parse
|
|
|
+import ast
|
|
|
+
|
|
|
+driver = None
|
|
|
+
|
|
|
+db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
|
|
|
+
|
|
|
+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+
|
|
|
+def re_get_webdriver():
|
|
|
+ global port
|
|
|
+ global driver
|
|
|
+ global portnum
|
|
|
+ global is_docker
|
|
|
+ result = []
|
|
|
+ if driver is not None:
|
|
|
+ print('closing....')
|
|
|
+ driver.quit()
|
|
|
+ print('quit....')
|
|
|
+ driver = None
|
|
|
+ try:
|
|
|
+ s = Service('/Users/mac/Downloads/119/chromedriver')
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ options.add_argument('--headless')
|
|
|
+
|
|
|
+ # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
|
|
|
+ options.add_argument("--incognito")
|
|
|
+ driver = webdriver.Chrome(
|
|
|
+ options=options, service=s)
|
|
|
+ driver.delete_all_cookies()
|
|
|
+ driver.set_window_size(1400, 1000)
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+ driver = None
|
|
|
+ return None
|
|
|
+ return driver
|
|
|
+
|
|
|
+
|
|
|
+def run_once(jsobj):
|
|
|
+ table = db['seo_jobs_ranking']
|
|
|
+ history = db['seo_search_history']
|
|
|
+ nda_log = db['nda_log']
|
|
|
+
|
|
|
+ print(jsobj)
|
|
|
+ neg_word = ast.literal_eval(jsobj['neg_word'])
|
|
|
+ print('這裏',neg_word)
|
|
|
+ i = 100
|
|
|
+ while True:
|
|
|
+ driver = re_get_webdriver()
|
|
|
+ print('re_get_webdriver')
|
|
|
+ if driver is not None:
|
|
|
+ break
|
|
|
+ time.sleep(3)
|
|
|
+ try:
|
|
|
+ kw = jsobj['kw']
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
+ driver.get(googleurl)
|
|
|
+
|
|
|
+ time.sleep(6)
|
|
|
+ print(driver.current_url)
|
|
|
+ # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
|
|
|
+ # time.sleep(1)
|
|
|
+ # elmt.send_keys(kw)
|
|
|
+ # elmt.send_keys(Keys.ENTER)
|
|
|
+ # time.sleep(6)
|
|
|
+
|
|
|
+ elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
|
|
|
+
|
|
|
+ numresults = len(elmts)
|
|
|
+
|
|
|
+ print('搜尋結果數量', numresults)
|
|
|
+ if numresults == 0:
|
|
|
+ print(driver.current_url)
|
|
|
+ print(driver.title)
|
|
|
+ sys.exit()
|
|
|
+ # time.sleep(9999)
|
|
|
+
|
|
|
+ idx = 1
|
|
|
+ found = False
|
|
|
+ test_lst = []
|
|
|
+ clickelmt = None
|
|
|
+ neg_count = 0
|
|
|
+ neg_total = 0
|
|
|
+ clickidx = 0
|
|
|
+ clickhref = ''
|
|
|
+ clicktitle = ''
|
|
|
+ for elmt in elmts:
|
|
|
+ href = elmt.get_attribute('href')
|
|
|
+ txt = elmt.text
|
|
|
+ history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
|
|
|
+ # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
|
|
|
+ # neg_count+=1
|
|
|
+ # neg_total+=idx
|
|
|
+ # print('分數',neg_total, neg_count)
|
|
|
+ for i in neg_word:
|
|
|
+ print(i)
|
|
|
+ if i in txt:
|
|
|
+ neg_count += 1
|
|
|
+ neg_total += idx
|
|
|
+ print('分數',neg_total, neg_count)
|
|
|
+ if domain in href:
|
|
|
+ print('found....')
|
|
|
+
|
|
|
+ print(href)
|
|
|
+ print(txt)
|
|
|
+ print("ranking", idx)
|
|
|
+ found = True
|
|
|
+ clickelmt = elmt
|
|
|
+ clickidx = idx
|
|
|
+ clickhref = href
|
|
|
+ clicktitle = txt
|
|
|
+ nda_log.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt': datetime.datetime.now(), 'client': jsobj['client']})
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
+ print('clicked....')
|
|
|
+ time.sleep(5)
|
|
|
+ if neg_count == 0:
|
|
|
+ negstr = '0'
|
|
|
+ else:
|
|
|
+ negstr = str(neg_total / neg_count)
|
|
|
+ print(' negative: ' + negstr)
|
|
|
+ table.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle,
|
|
|
+ 'avg_neg': negstr, 'dt': datetime.datetime.now()})
|
|
|
+ db.close()
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
|
|
|
+ idx += 1
|
|
|
+
|
|
|
+ db.close()
|
|
|
+
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+ print('exception')
|
|
|
+ traceback.print_exc()
|
|
|
+ db.close()
|
|
|
+ driver.quit()
|
|
|
+
|
|
|
+cursor = db.query("select cust, json from public.seo_job where cust='信義房屋' order by random() limit 1")
|
|
|
+cursor_n = db.query("select * from public.neg_word where client='信義房屋'")
|
|
|
+
|
|
|
+for c in cursor:
|
|
|
+ js_string = c['json']
|
|
|
+ js = json.loads(js_string)
|
|
|
+ prefix=js['prefix']
|
|
|
+ postfix=js['postfix']
|
|
|
+ domain=js['domain'][0]
|
|
|
+ positive=js['positive']
|
|
|
+ rnd=js['rnd']
|
|
|
+
|
|
|
+kw1=random.choice(positive)
|
|
|
+kw2=random.choice(rnd)
|
|
|
+# kw=kw1+" "+prefix+" "+kw2
|
|
|
+kw = prefix + " " + kw1
|
|
|
+for c in cursor_n:
|
|
|
+ neg_word = c['neg_word']
|
|
|
+
|
|
|
+
|
|
|
+while True:
|
|
|
+ run_once({'domain':domain,'kw':'信義 房屋','client':'信義房屋','neg_word':neg_word})
|
|
|
+ print('等待下次執行')
|
|
|
+ time.sleep(80)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|