click_negative_vi.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. # import redis
  2. import time
  3. import traceback
  4. # import json
  5. from selenium import webdriver
  6. from selenium.webdriver.chrome.service import Service
  7. import undetected_chromedriver as uc
  8. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  9. import time
  10. # import urllib
  11. import os
  12. from selenium.webdriver.support.ui import WebDriverWait
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.support import expected_conditions as EC
  15. import dataset
  16. from selenium.webdriver.common.keys import Keys
  17. import json
  18. import random
  19. import time
  20. # import redis
  21. import sys
  22. import codecs
  23. import random
  24. import datetime
  25. import os
  26. import time
  27. import requests
  28. import urllib.parse
  29. import ast
  30. import pygetwindow as gw
  31. driver = None
  32. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  33. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  34. # headers = {
  35. # "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN",
  36. # "Content-Type": "application/x-www-form-urlencoded"
  37. headers = {
  38. "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ",
  39. "Content-Type": "application/x-www-form-urlencoded"
  40. }
  41. def send_msg(kw):
  42. params = {"message": kw}
  43. print('通知結果', params)
  44. r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
  45. def re_get_webdriver():
  46. global port
  47. global driver
  48. global portnum
  49. global is_docker
  50. result = []
  51. if driver is not None:
  52. print('closing....')
  53. driver.quit()
  54. print('quit....')
  55. driver = None
  56. try:
  57. options = uc.ChromeOptions()
  58. # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
  59. options.add_argument("--window-size=200,100") # 縮小視窗
  60. options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
  61. # for window in gw.getWindowsWithTitle("Chrome"):
  62. # window.minimize()
  63. driver = uc.Chrome(options=options)
  64. driver.delete_all_cookies()
  65. except:
  66. traceback.print_exc()
  67. driver = None
  68. return None
  69. return driver
  70. def run_once(jsobj):
  71. table = db['seo_jobs_ranking']
  72. history = db['seo_search_history']
  73. nda_log = db['nda_log']
  74. delete_kw = db['delete_kw']
  75. seo = db['seo']
  76. print(jsobj)
  77. neg_word = ast.literal_eval(jsobj['neg_word'])
  78. print('這裏',neg_word)
  79. i = 100
  80. while True:
  81. driver = re_get_webdriver()
  82. print('re_get_webdriver')
  83. if driver is not None:
  84. break
  85. time.sleep(3)
  86. try:
  87. kw = jsobj['kw']
  88. domain = jsobj['domain']
  89. # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  90. # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  91. googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
  92. driver.get(googleurl)
  93. time.sleep(6)
  94. print(driver.current_url)
  95. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  96. # time.sleep(1)
  97. # elmt.send_keys(kw)
  98. # elmt.send_keys(Keys.ENTER)
  99. # time.sleep(6)
  100. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  101. # elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a")
  102. numresults = len(elmts)
  103. print('搜尋結果數量', numresults)
  104. if numresults == 0:
  105. print(driver.current_url)
  106. print(driver.title)
  107. sys.exit()
  108. # time.sleep(9999)
  109. idx = 1
  110. found = 0
  111. test_lst = []
  112. clickelmt = None
  113. neg_count = 0
  114. neg_total = 0
  115. clickidx = 0
  116. clickhref = ''
  117. clicktitle = ''
  118. for elmt in elmts:
  119. href = elmt.get_attribute('href')
  120. # print(href)
  121. txt = elmt.text
  122. history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
  123. # if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
  124. # neg_count+=1
  125. # neg_total+=idx
  126. # print('分數',neg_total, neg_count)
  127. for i in neg_word:
  128. if i in txt:
  129. neg_count += 1
  130. neg_total += idx
  131. break
  132. if domain in href:
  133. print('found....')
  134. print(href)
  135. print(txt)
  136. print("ranking", idx)
  137. found = True
  138. clickelmt = elmt
  139. clickidx = idx
  140. clickhref = href
  141. clicktitle = txt
  142. found = 1
  143. else:
  144. if found == 1:
  145. not_found = 0
  146. else:
  147. not_found = 1
  148. idx += 1
  149. if not_found == 1:
  150. print('未收錄')
  151. nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
  152. seo.delete(kw=kw, domain=domain)
  153. delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()})
  154. msg_1 = '未收錄:'+kw+' '+domain
  155. msg_2 = jsobj['delete_kw_count']
  156. # send_msg(msg_1 + "\n" + str(msg_2))
  157. else:
  158. nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':''})
  159. webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
  160. webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
  161. print('clicked....')
  162. time.sleep(5)
  163. print('分數', neg_total, neg_count)
  164. if neg_count == 0:
  165. negstr = 0
  166. else:
  167. negstr = neg_total / neg_count
  168. print(negstr)
  169. if negstr > 0 and negstr < 21:
  170. print('警示字')
  171. msg_1 = '警示字:' + kw
  172. msg_2 = jsobj['delete_kw_count']
  173. # send_msg(msg_1 + "\n" + str(msg_2))
  174. seo.delete(kw=kw, domain=domain)
  175. delete_kw.insert({'kw': kw, 'domain': domain, 'cust': jsobj['client'],'dt':datetime.datetime.now()})
  176. # table.insert(
  177. # {'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': domain, 'title': clicktitle, 'avg_neg': negstr,
  178. # 'dt': datetime.datetime.now()})
  179. except:
  180. traceback.print_exc()
  181. print('exception')
  182. traceback.print_exc()
  183. # db.close()
  184. driver.quit()
  185. while True:
  186. # cursor = db.query("select * from public.seo where cust='' and type='vi' order by random() limit 1")
  187. cursor = db.query("select * from public.seo where cust='美東' order by random() limit 1")
  188. # cursor = db.query("select * from public.seo where id=627")
  189. cursor_n = db.query("select * from public.neg_word where client='美東'")
  190. cursor_d = db.query("select * from public.delete_kw where now()::date = dt::date")
  191. for c in cursor:
  192. kw = c['kw']
  193. domain = c['domain']
  194. d = {'美東':0,'百威':0}
  195. for c in cursor_d:
  196. if c['cust'] in d.keys():
  197. d[c['cust']]+=1
  198. print(d)
  199. for c in cursor_n:
  200. neg_word = c['neg_word']
  201. run_once({'domain':domain,'kw':kw,'client':'美東','neg_word':neg_word,'delete_kw_count':d})
  202. # run_once({'domain':domain,'kw':kw,'client':'百威','delete_kw_count':d})
  203. # db.close()
  204. print('等待下次執行')
  205. time.sleep(60)