|
@@ -30,8 +30,19 @@ import ast
|
|
|
driver = None
|
|
|
|
|
|
db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
|
|
|
-
|
|
|
# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+# headers = {
|
|
|
+# "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN",
|
|
|
+# "Content-Type": "application/x-www-form-urlencoded"
|
|
|
+headers = {
|
|
|
+ "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded"
|
|
|
+}
|
|
|
+
|
|
|
+def send_msg(kw):
|
|
|
+ params = {"message": kw}
|
|
|
+ print('通知結果', params)
|
|
|
+ r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
|
|
|
|
|
|
def re_get_webdriver():
|
|
|
global port
|
|
@@ -45,7 +56,7 @@ def re_get_webdriver():
|
|
|
print('quit....')
|
|
|
driver = None
|
|
|
try:
|
|
|
- s = Service('/Users/mac/Downloads/119/chromedriver')
|
|
|
+ s = Service('/Users/mac/Downloads/123/chromedriver')
|
|
|
options = webdriver.ChromeOptions()
|
|
|
options.add_argument('--headless')
|
|
|
|
|
@@ -66,6 +77,9 @@ def run_once(jsobj):
|
|
|
table = db['seo_jobs_ranking']
|
|
|
history = db['seo_search_history']
|
|
|
nda_log = db['nda_log']
|
|
|
+ delete_kw = db['delete_kw']
|
|
|
+ seo = db['seo']
|
|
|
+
|
|
|
|
|
|
print(jsobj)
|
|
|
neg_word = ast.literal_eval(jsobj['neg_word'])
|
|
@@ -79,7 +93,9 @@ def run_once(jsobj):
|
|
|
time.sleep(3)
|
|
|
try:
|
|
|
kw = jsobj['kw']
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
+ domain = jsobj['domain']
|
|
|
+ # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
driver.get(googleurl)
|
|
|
|
|
|
time.sleep(6)
|
|
@@ -90,7 +106,8 @@ def run_once(jsobj):
|
|
|
# elmt.send_keys(Keys.ENTER)
|
|
|
# time.sleep(6)
|
|
|
|
|
|
- elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
|
|
|
+ # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
|
|
|
+ elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a")
|
|
|
|
|
|
numresults = len(elmts)
|
|
|
|
|
@@ -102,7 +119,7 @@ def run_once(jsobj):
|
|
|
# time.sleep(9999)
|
|
|
|
|
|
idx = 1
|
|
|
- found = False
|
|
|
+ found = 0
|
|
|
test_lst = []
|
|
|
clickelmt = None
|
|
|
neg_count = 0
|
|
@@ -112,6 +129,7 @@ def run_once(jsobj):
|
|
|
clicktitle = ''
|
|
|
for elmt in elmts:
|
|
|
href = elmt.get_attribute('href')
|
|
|
+ # print(href)
|
|
|
txt = elmt.text
|
|
|
history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
|
|
|
# if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
|
|
@@ -119,14 +137,13 @@ def run_once(jsobj):
|
|
|
# neg_total+=idx
|
|
|
# print('分數',neg_total, neg_count)
|
|
|
for i in neg_word:
|
|
|
- print(i)
|
|
|
if i in txt:
|
|
|
neg_count += 1
|
|
|
neg_total += idx
|
|
|
- print('分數',neg_total, neg_count)
|
|
|
+ break
|
|
|
+ # print('分數',neg_total, neg_count)
|
|
|
if domain in href:
|
|
|
print('found....')
|
|
|
-
|
|
|
print(href)
|
|
|
print(txt)
|
|
|
print("ranking", idx)
|
|
@@ -135,60 +152,83 @@ def run_once(jsobj):
|
|
|
clickidx = idx
|
|
|
clickhref = href
|
|
|
clicktitle = txt
|
|
|
- nda_log.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt': datetime.datetime.now(), 'client': jsobj['client']})
|
|
|
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
- print('clicked....')
|
|
|
- time.sleep(5)
|
|
|
- if neg_count == 0:
|
|
|
- negstr = '0'
|
|
|
- else:
|
|
|
- negstr = str(neg_total / neg_count)
|
|
|
- print(' negative: ' + negstr)
|
|
|
- table.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle,
|
|
|
- 'avg_neg': negstr, 'dt': datetime.datetime.now()})
|
|
|
- db.close()
|
|
|
- break
|
|
|
+
|
|
|
+ found = 1
|
|
|
else:
|
|
|
- nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
|
|
|
+ if found == 1:
|
|
|
+ not_found = 0
|
|
|
+ else:
|
|
|
+ not_found = 1
|
|
|
idx += 1
|
|
|
|
|
|
- db.close()
|
|
|
+ if not_found == 1:
|
|
|
+ print('未收錄')
|
|
|
+ nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
|
|
|
+ seo.delete(kw=kw, domain=domain)
|
|
|
+ delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()})
|
|
|
+ msg_1 = '未收錄:'+kw+' '+domain
|
|
|
+ msg_2 = jsobj['delete_kw_count']
|
|
|
+ send_msg(msg_1 + "\n" + str(msg_2))
|
|
|
+ else:
|
|
|
+ nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':'vi'})
|
|
|
+ webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
|
|
|
+ print('clicked....')
|
|
|
+ time.sleep(5)
|
|
|
+
|
|
|
+ if neg_count == 0:
|
|
|
+ negstr = 0
|
|
|
+ else:
|
|
|
+ negstr = neg_total / neg_count
|
|
|
+ print(negstr)
|
|
|
+ if negstr > 0 and negstr < 21:
|
|
|
+ print('警示字')
|
|
|
+ msg_1 = '警示字:' + kw
|
|
|
+ msg_2 = jsobj['delete_kw_count']
|
|
|
+ send_msg(msg_1 + "\n" + str(msg_2))
|
|
|
+ seo.delete(kw=kw, domain=domain)
|
|
|
+ delete_kw.insert({'kw': kw, 'domain': domain, 'cust': jsobj['client'],'dt':datetime.datetime.now()})
|
|
|
+ table.insert(
|
|
|
+ {'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle, 'avg_neg': negstr,
|
|
|
+ 'dt': datetime.datetime.now()})
|
|
|
|
|
|
except:
|
|
|
traceback.print_exc()
|
|
|
-
|
|
|
print('exception')
|
|
|
traceback.print_exc()
|
|
|
- db.close()
|
|
|
- driver.quit()
|
|
|
+ # db.close()
|
|
|
|
|
|
-cursor = db.query("select cust, json from public.seo_job where cust='信義房屋' order by random() limit 1")
|
|
|
-cursor_n = db.query("select * from public.neg_word where client='信義房屋'")
|
|
|
+ driver.quit()
|
|
|
|
|
|
-for c in cursor:
|
|
|
- js_string = c['json']
|
|
|
- js = json.loads(js_string)
|
|
|
- prefix=js['prefix']
|
|
|
- postfix=js['postfix']
|
|
|
- domain=js['domain'][0]
|
|
|
- positive=js['positive']
|
|
|
- rnd=js['rnd']
|
|
|
|
|
|
-kw1=random.choice(positive)
|
|
|
-kw2=random.choice(rnd)
|
|
|
-# kw=kw1+" "+prefix+" "+kw2
|
|
|
-kw = prefix + " " + kw1
|
|
|
-for c in cursor_n:
|
|
|
- neg_word = c['neg_word']
|
|
|
|
|
|
|
|
|
while True:
|
|
|
- run_once({'domain':domain,'kw':'信義 房屋','client':'信義房屋','neg_word':neg_word})
|
|
|
+
|
|
|
+
|
|
|
+ # cursor = db.query("select * from public.seo where cust='信義房屋' and type is NULL order by random() limit 1")
|
|
|
+ cursor = db.query("select * from public.seo where cust='信義房屋' and type='vi' order by random() limit 1")
|
|
|
+ # cursor = db.query("select * from public.seo where id=627")
|
|
|
+ cursor_n = db.query("select * from public.neg_word where client='信義房屋'")
|
|
|
+ cursor_d = db.query("select * from public.delete_kw where now()::date = dt::date")
|
|
|
+
|
|
|
+ for c in cursor:
|
|
|
+ kw = c['kw']
|
|
|
+ domain = c['domain']
|
|
|
+
|
|
|
+ d = {'啟翔':0,'加百裕':0,'富玉':0,'信義房屋':0,'真理大學':0}
|
|
|
+ for c in cursor_d:
|
|
|
+ if c['cust'] in d.keys():
|
|
|
+ d[c['cust']]+=1
|
|
|
+ print(d)
|
|
|
+ for c in cursor_n:
|
|
|
+ neg_word = c['neg_word']
|
|
|
+
|
|
|
+ run_once({'domain':domain,'kw':kw,'client':'信義房屋','neg_word':neg_word,'delete_kw_count':d})
|
|
|
+ # db.close()
|
|
|
print('等待下次執行')
|
|
|
time.sleep(80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-
|