gen_seo.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.support import expected_conditions as EC
  10. import dataset
  11. from selenium import webdriver
  12. from selenium.webdriver.common.by import By
  13. from selenium.webdriver.common.keys import Keys
  14. from selenium.webdriver.chrome.service import Service
  15. import json
  16. import random
  17. import time
  18. import datetime
  19. import sys
  20. import codecs
  21. import random
  22. import os
  23. import time
  24. import requests
  25. import pymysql
  26. import urllib.parse
  27. pymysql.install_as_MySQLdb()
  28. driver=None
  29. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  30. headers = {
  31. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  32. "Content-Type": "application/x-www-form-urlencoded"
  33. }
  34. def send_msg(kw):
  35. params = {"message":kw}
  36. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  37. def re_get_webdriver():
  38. global port
  39. global driver
  40. result=[]
  41. if driver is not None:
  42. print('closing....')
  43. driver.quit()
  44. os.system('killall chrome')
  45. print('quit....')
  46. driver=None
  47. try:
  48. options = webdriver.ChromeOptions()
  49. # options.add_argument("user-agent=%s" % user_agent)
  50. # options.add_argument('--headless')
  51. options.add_argument("--incognito")
  52. driver = webdriver.Chrome(options=options)
  53. driver.delete_all_cookies()
  54. driver.set_window_size(1400,1000)
  55. except:
  56. traceback.print_exc()
  57. driver=None
  58. return None
  59. def run_once(jsobj):
  60. table=db['nda_log']
  61. print(jsobj)
  62. global driver
  63. # i=random.randint(0,9)
  64. i=100
  65. if driver is None:
  66. time.sleep(8)
  67. re_get_webdriver()
  68. if driver is None:
  69. return
  70. try:
  71. kw=jsobj['kw']
  72. if jsobj.get('domain') is None:
  73. exclude=jsobj['exclude']
  74. domain=None
  75. else:
  76. domain=jsobj['domain']
  77. exclude=None
  78. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW')
  79. driver.get(googleurl)
  80. time.sleep(6)
  81. print(driver.current_url)
  82. if 'sorry' in driver.current_url:
  83. print("URL Error: Caught")
  84. return
  85. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  86. # time.sleep(1)
  87. # elmt.send_keys(kw)
  88. # elmt.send_keys(Keys.ENTER)
  89. # time.sleep(6)
  90. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  91. numresults=len(elmts)
  92. print('搜尋結果數量',numresults)
  93. if numresults==0:
  94. send_msg('stop working...')
  95. sys.exit()
  96. idx=1
  97. found=False
  98. test_lst=[]
  99. txt_dict={}
  100. for elmt in elmts:
  101. href=elmt.get_attribute('href')
  102. txt=elmt.text
  103. if len(txt)>10:
  104. if domain is not None:
  105. if domain in href:
  106. print('found....')
  107. print('clicked....')
  108. print(href)
  109. print("ranking", idx)
  110. found=True
  111. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  112. # elmt.click()
  113. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  114. table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt,'dt':datetime.datetime.now(),'client':jsobj['cust']})
  115. time.sleep(5)
  116. page_height = driver.execute_script("return document.body.scrollHeight")
  117. scroll_step = page_height // 4
  118. current_height = 0
  119. while current_height < page_height:
  120. driver.execute_script(f"window.scrollTo(0, {current_height + scroll_step});")
  121. time.sleep(3)
  122. current_height += scroll_step
  123. time.sleep(10)
  124. db.close()
  125. break
  126. else:
  127. ex=False
  128. for ee in exclude:
  129. if ee in href:
  130. ex=True
  131. if not ex:
  132. test_lst.append(elmt)
  133. txt_dict[elmt]=txt
  134. idx+=1
  135. if exclude is not None:
  136. print('exclude')
  137. elmt=random.choice(test_lst[5:])
  138. print(elmt)
  139. print(txt_dict[elmt])
  140. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  141. elmt.click()
  142. # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  143. time.sleep(5)
  144. if not found:
  145. table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']})
  146. except:
  147. print('exception')
  148. traceback.print_exc()
  149. driver.quit()
  150. # sys.exit()
  151. while True:
  152. cursor=db.query('select json from seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" order by rand() limit 1')
  153. for c in cursor:
  154. js=json.loads(c['json'])
  155. prefix=js['prefix']
  156. postfix=js['postfix']
  157. domain=js['domain'][0]
  158. positive=js['positive']
  159. rnd=js['rnd']
  160. kw1=random.choice(positive)
  161. kw2=random.choice(rnd)
  162. kw=kw1+" "+prefix+" "+kw2
  163. code='03'
  164. run_once({'domain':domain,'kw':kw, 'cust':'啟翔'})
  165. time.sleep(61)