click_negative.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. # import redis
  2. import time
  3. import traceback
  4. # import json
  5. from selenium import webdriver
  6. from selenium.webdriver.chrome.service import Service
  7. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  8. import time
  9. # import urllib
  10. import os
  11. from selenium.webdriver.support.ui import WebDriverWait
  12. from selenium.webdriver.common.by import By
  13. from selenium.webdriver.support import expected_conditions as EC
  14. import dataset
  15. from selenium.webdriver.common.keys import Keys
  16. import json
  17. import random
  18. import time
  19. # import redis
  20. import sys
  21. import codecs
  22. import random
  23. import datetime
  24. import os
  25. import time
  26. import requests
  27. import urllib.parse
  28. import ast
  29. driver = None
  30. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  31. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  32. # headers = {
  33. # "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN",
  34. # "Content-Type": "application/x-www-form-urlencoded"
  35. headers = {
  36. "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ",
  37. "Content-Type": "application/x-www-form-urlencoded"
  38. }
  39. def send_msg(kw):
  40. params = {"message": kw}
  41. print('通知結果', params)
  42. r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
  43. def re_get_webdriver():
  44. global port
  45. global driver
  46. global portnum
  47. global is_docker
  48. result = []
  49. if driver is not None:
  50. print('closing....')
  51. driver.quit()
  52. print('quit....')
  53. driver = None
  54. try:
  55. s = Service('/Users/mac/Downloads/127/chromedriver')
  56. options = webdriver.ChromeOptions()
  57. options.add_argument('--headless')
  58. # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
  59. options.add_argument("--incognito")
  60. driver = webdriver.Chrome(
  61. options=options, service=s)
  62. driver.delete_all_cookies()
  63. driver.set_window_size(1400, 1000)
  64. except:
  65. traceback.print_exc()
  66. driver = None
  67. return None
  68. return driver
  69. def run_once(jsobj):
  70. table = db['seo_jobs_ranking']
  71. history = db['seo_search_history']
  72. nda_log = db['nda_log']
  73. delete_kw = db['delete_kw']
  74. seo = db['seo']
  75. print(jsobj)
  76. neg_word = ast.literal_eval(jsobj['neg_word'])
  77. print('這裏',neg_word)
  78. i = 100
  79. while True:
  80. driver = re_get_webdriver()
  81. print('re_get_webdriver')
  82. if driver is not None:
  83. break
  84. time.sleep(3)
  85. try:
  86. kw = jsobj['kw']
  87. domain = jsobj['domain']
  88. # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  89. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  90. driver.get(googleurl)
  91. time.sleep(6)
  92. print(driver.current_url)
  93. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  94. # time.sleep(1)
  95. # elmt.send_keys(kw)
  96. # elmt.send_keys(Keys.ENTER)
  97. # time.sleep(6)
  98. # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  99. elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a")
  100. numresults = len(elmts)
  101. print('搜尋結果數量', numresults)
  102. if numresults == 0:
  103. print(driver.current_url)
  104. print(driver.title)
  105. sys.exit()
  106. # time.sleep(9999)
  107. idx = 1
  108. found = 0
  109. test_lst = []
  110. clickelmt = None
  111. neg_count = 0
  112. neg_total = 0
  113. clickidx = 0
  114. clickhref = ''
  115. clicktitle = ''
  116. for elmt in elmts:
  117. href = elmt.get_attribute('href')
  118. # print(href)
  119. txt = elmt.text
  120. history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
  121. # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
  122. # neg_count+=1
  123. # neg_total+=idx
  124. # print('分數',neg_total, neg_count)
  125. for i in neg_word:
  126. if i in txt:
  127. neg_count += 1
  128. neg_total += idx
  129. break
  130. # print('分數',neg_total, neg_count)
  131. if domain in href:
  132. print('found....')
  133. print(href)
  134. print(txt)
  135. print("ranking", idx)
  136. found = True
  137. clickelmt = elmt
  138. clickidx = idx
  139. clickhref = href
  140. clicktitle = txt
  141. found = 1
  142. else:
  143. if found == 1:
  144. not_found = 0
  145. else:
  146. not_found = 1
  147. idx += 1
  148. if not_found == 1:
  149. print('未收錄')
  150. nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
  151. seo.delete(kw=kw, domain=domain)
  152. delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()})
  153. msg_1 = '未收錄:'+kw+' '+domain
  154. msg_2 = jsobj['delete_kw_count']
  155. send_msg(msg_1 + "\n" + str(msg_2))
  156. else:
  157. nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':'vi'})
  158. webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
  159. webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
  160. print('clicked....')
  161. time.sleep(5)
  162. if neg_count == 0:
  163. negstr = 0
  164. else:
  165. negstr = neg_total / neg_count
  166. print(negstr)
  167. if negstr > 0 and negstr < 21:
  168. print('警示字')
  169. msg_1 = '警示字:' + kw
  170. msg_2 = jsobj['delete_kw_count']
  171. send_msg(msg_1 + "\n" + str(msg_2))
  172. seo.delete(kw=kw, domain=domain)
  173. delete_kw.insert({'kw': kw, 'domain': domain, 'cust': jsobj['client'],'dt':datetime.datetime.now()})
  174. table.insert(
  175. {'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle, 'avg_neg': negstr,
  176. 'dt': datetime.datetime.now()})
  177. except:
  178. traceback.print_exc()
  179. print('exception')
  180. traceback.print_exc()
  181. # db.close()
  182. driver.quit()
  183. while True:
  184. # cursor = db.query("select * from public.seo where cust='百威' and type is NULL order by random() limit 1")
  185. cursor = db.query("select * from public.seo where cust='信義房屋' and type='vi' order by random() limit 1")
  186. # cursor = db.query("select * from public.seo where id=627")
  187. cursor_n = db.query("select * from public.neg_word where client='信義房屋'")
  188. cursor_d = db.query("select * from public.delete_kw where now()::date = dt::date")
  189. for c in cursor:
  190. kw = c['kw']
  191. domain = c['domain']
  192. d = {'信義房屋':0,'真理大學':0,'驊揚':0,'百威':0}
  193. for c in cursor_d:
  194. if c['cust'] in d.keys():
  195. d[c['cust']]+=1
  196. print(d)
  197. for c in cursor_n:
  198. neg_word = c['neg_word']
  199. run_once({'domain':domain,'kw':kw,'client':'信義房屋','neg_word':neg_word,'delete_kw_count':d})
  200. # db.close()
  201. print('等待下次執行')
  202. time.sleep(80)