click_positive.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # import redis
  2. import time
  3. import traceback
  4. # import json
  5. from selenium import webdriver
  6. from selenium.webdriver.chrome.service import Service
  7. import undetected_chromedriver as uc
  8. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  9. import time
  10. # import urllib
  11. import os
  12. from selenium.webdriver.support.ui import WebDriverWait
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.support import expected_conditions as EC
  15. import dataset
  16. from selenium.webdriver.common.keys import Keys
  17. import json
  18. import random
  19. import time
  20. # import redis
  21. import sys
  22. import codecs
  23. import random
  24. import datetime
  25. import os
  26. import time
  27. import requests
  28. import urllib.parse
  29. import ast
  30. driver = None
  31. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  32. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  33. # headers = {
  34. # "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN",
  35. # "Content-Type": "application/x-www-form-urlencoded"
  36. headers = {
  37. "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ",
  38. "Content-Type": "application/x-www-form-urlencoded"
  39. }
  40. def send_msg(kw):
  41. params = {"message": kw}
  42. print('通知結果', params)
  43. r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
  44. def re_get_webdriver():
  45. global port
  46. global driver
  47. global portnum
  48. global is_docker
  49. result = []
  50. if driver is not None:
  51. print('closing....')
  52. driver.quit()
  53. print('quit....')
  54. driver = None
  55. try:
  56. s = Service('C:\/Users\/s1301\/Downloads\/136\/chromedriver-win32\/chromedriver.exe')
  57. options = uc.ChromeOptions()
  58. # options.add_argument('--headless')
  59. options.add_argument("--window-size=200,100") # 縮小視窗
  60. options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
  61. # driver = webdriver.Chrome(options=options, service=s)
  62. driver = uc.Chrome(options=options)
  63. except:
  64. traceback.print_exc()
  65. driver = None
  66. return None
  67. return driver
  68. def run_once(jsobj):
  69. table = db['seo_jobs_ranking']
  70. history = db['seo_search_history']
  71. nda_log = db['nda_log']
  72. delete_kw = db['delete_kw']
  73. seo = db['seo']
  74. print(jsobj)
  75. i = 100
  76. while True:
  77. driver = re_get_webdriver()
  78. print('re_get_webdriver')
  79. if driver is not None:
  80. break
  81. time.sleep(3)
  82. try:
  83. kw = jsobj['kw']
  84. domain = jsobj['domain']
  85. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 20, 'zh-TW')
  86. # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  87. driver.get(googleurl)
  88. time.sleep(6)
  89. print(driver.current_url)
  90. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  91. # time.sleep(1)
  92. # elmt.send_keys(kw)
  93. # elmt.send_keys(Keys.ENTER)
  94. # time.sleep(10)
  95. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  96. # elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a")
  97. numresults = len(elmts)
  98. print('搜尋結果數量', numresults)
  99. if numresults == 0:
  100. print(driver.current_url)
  101. print(driver.title)
  102. sys.exit()
  103. # time.sleep(9999)
  104. idx = 1
  105. found = 0
  106. test_lst = []
  107. clickelmt = None
  108. neg_count = 0
  109. neg_total = 0
  110. clickidx = 0
  111. clickhref = ''
  112. clicktitle = ''
  113. for elmt in elmts:
  114. href = elmt.get_attribute('href')
  115. # print(href)
  116. txt = elmt.text
  117. history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()})
  118. if domain in href:
  119. print('found....')
  120. print(href)
  121. print(txt)
  122. print("ranking", idx)
  123. found = True
  124. clickelmt = elmt
  125. clickidx = idx
  126. clickhref = href
  127. clicktitle = txt
  128. found = 1
  129. else:
  130. if found == 1:
  131. not_found = 0
  132. else:
  133. not_found = 1
  134. idx += 1
  135. if not_found == 1:
  136. print('未收錄')
  137. nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']})
  138. seo.delete(kw=kw, domain=domain)
  139. delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()})
  140. msg_1 = '未收錄:'+kw+' '+domain
  141. msg_2 = jsobj['delete_kw_count']
  142. send_msg(msg_1 + "\n" + str(msg_2))
  143. else:
  144. nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':''})
  145. webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
  146. webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
  147. print('clicked....')
  148. time.sleep(15)
  149. except:
  150. traceback.print_exc()
  151. print('exception')
  152. traceback.print_exc()
  153. # db.close()
  154. driver.quit()
  155. while True:
  156. kw_dict = {'邱德馨 linkedin': 'linkedin.com', '邱德馨 facebook': "facebook.com", '邱德馨 樹脂市場': "vocus.cc",
  157. '邱德馨 減碳': "ctwant.com", '邱德馨 關懷員工': 'news.aimedium.org', '邱德馨 國喬': 'vocus.cc'}
  158. kw_ = random.choice(list(kw_dict.items()))
  159. kw = kw_[0]
  160. domain = kw_[1]
  161. print(kw,domain)
  162. run_once({'domain':domain,'kw':kw,'client':'HHH'})
  163. # db.close()
  164. print('等待下次執行')
  165. time.sleep(90)