gen_seo.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. from userAgentRandomizer import userAgents
  24. driver=None
  25. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  26. def re_get_webdriver():
  27. global driver
  28. result=[]
  29. if driver is not None:
  30. print('closing....')
  31. driver.quit()
  32. os.system('killall chrome')
  33. print('quit....')
  34. driver=None
  35. try:
  36. ua = userAgents()
  37. user_agent = ua.random()
  38. options = webdriver.ChromeOptions()
  39. options.add_argument("--no-sandbox")
  40. options.add_argument("--disable-dev-shm-usage")
  41. options.add_argument("--headless")
  42. print(user_agent)
  43. # options.add_argument("--user-agent=" +user_agent)
  44. options.add_argument("--incognito")
  45. driver=None
  46. try:
  47. driver = webdriver.Chrome(options=options)
  48. except:
  49. # driver.quit()
  50. # os.system('pkill -f ')
  51. os.system('kill %d' % os.getpid())
  52. sys.exit()
  53. return
  54. driver.set_window_size(1400,1000)
  55. return
  56. except:
  57. import traceback
  58. traceback.print_exc()
  59. driver=None
  60. return None
  61. def from_shopping(kw):
  62. global driver
  63. driver.get('https://shopping.google.com')
  64. time.sleep(5)
  65. elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
  66. elmt.send_keys('幸福空間')
  67. elmt.send_keys(Keys.ENTER) #hits space
  68. time.sleep(7)
  69. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  70. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  71. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  72. time.sleep(5)
  73. def from_book(kw):
  74. global driver
  75. driver.get('https://books.google.com/')
  76. time.sleep(5)
  77. elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
  78. elmt.send_keys('幸福空間')
  79. elmt.send_keys(Keys.ENTER) #hits space
  80. time.sleep(7)
  81. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  82. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  83. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  84. time.sleep(5)
  85. def from_wiki(kw):
  86. global driver
  87. driver.get('https://en.wikipedia.org/wiki/Google_Search')
  88. time.sleep(4)
  89. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  90. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  91. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  92. time.sleep(5)
  93. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  94. time.sleep(1)
  95. elmt.send_keys(kw)
  96. elmt.send_keys(Keys.ENTER)
  97. time.sleep(6)
  98. def from_bing(kw):
  99. global driver
  100. driver.get('https://www.bing.com/search?q=google')
  101. time.sleep(4)
  102. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  103. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  104. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  105. time.sleep(5)
  106. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  107. time.sleep(1)
  108. elmt.send_keys(kw)
  109. elmt.send_keys(Keys.ENTER)
  110. time.sleep(6)
  111. def from_ecosia(kw):
  112. global driver
  113. driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
  114. time.sleep(4)
  115. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  116. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  117. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  118. time.sleep(5)
  119. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  120. time.sleep(1)
  121. elmt.send_keys(kw)
  122. elmt.send_keys(Keys.ENTER)
  123. time.sleep(6)
  124. def from_brave(kw):
  125. global driver
  126. driver.get('https://search.brave.com/search?q=google&source=web')
  127. time.sleep(4)
  128. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  129. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  130. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  131. time.sleep(5)
  132. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  133. time.sleep(1)
  134. elmt.send_keys(kw)
  135. elmt.send_keys(Keys.ENTER)
  136. time.sleep(6)
  137. def from_duckduckgo(kw):
  138. global driver
  139. driver.get('https://duckduckgo.com/?q=google')
  140. time.sleep(4)
  141. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  142. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  143. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  144. time.sleep(5)
  145. # time.sleep(9999)
  146. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  147. time.sleep(1)
  148. elmt.send_keys(kw)
  149. elmt.send_keys(Keys.ENTER)
  150. time.sleep(6)
  151. def from_ekoru(kw):
  152. global driver
  153. driver.get('https://www.ekoru.org/?q=google')
  154. time.sleep(4)
  155. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  156. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  157. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  158. time.sleep(5)
  159. # time.sleep(9999)
  160. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  161. time.sleep(1)
  162. elmt.send_keys(kw)
  163. elmt.send_keys(Keys.ENTER)
  164. time.sleep(6)
  165. def from_yahoo(kw):
  166. global driver
  167. driver.get('https://search.yahoo.com/search?p=google')
  168. time.sleep(4)
  169. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  170. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  171. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  172. time.sleep(2)
  173. driver.switch_to.window(driver.window_handles[1])
  174. time.sleep(3)
  175. print(driver.current_url)
  176. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  177. time.sleep(1)
  178. elmt.send_keys(kw)
  179. elmt.send_keys(Keys.ENTER)
  180. time.sleep(6)
  181. def from_gibiru(kw):
  182. global driver
  183. driver.get('https://gibiru.com/results.html?q=google')
  184. time.sleep(4)
  185. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  186. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  187. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  188. time.sleep(2)
  189. driver.switch_to.window(driver.window_handles[1])
  190. time.sleep(3)
  191. print(driver.current_url)
  192. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  193. time.sleep(1)
  194. elmt.send_keys(kw)
  195. elmt.send_keys(Keys.ENTER)
  196. time.sleep(6)
  197. def run_once(jsobj):
  198. table=db['rank_detection']
  199. print(jsobj)
  200. global driver
  201. # i=random.randint(0,9)
  202. i=100
  203. if driver is None:
  204. time.sleep(8)
  205. re_get_webdriver()
  206. if driver is None:
  207. return
  208. try:
  209. kw=jsobj['kw']
  210. domain=jsobj['domain']
  211. # kw='leo 娛樂城 484'
  212. # kw='leo 娛樂城 3011'
  213. # domain='tha484.com'
  214. # domain='tha3011.com'
  215. if i==0:
  216. from_book(kw)
  217. elif i==1:
  218. from_shopping(kw)
  219. elif i==2:
  220. from_wiki(kw)
  221. elif i==3:
  222. from_bing(kw)
  223. elif i==4:
  224. from_duckduckgo(kw)
  225. elif i==5:
  226. from_yahoo(kw)
  227. elif i==6:
  228. from_gibiru(kw)
  229. elif i==7:
  230. from_ekoru(kw)
  231. elif i==8:
  232. from_ecosia(kw)
  233. elif i==9:
  234. from_brave(kw)
  235. # time.sleep(9999)
  236. driver.get('https://www.google.com?num=100')
  237. time.sleep(3)
  238. print(driver.current_url)
  239. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  240. time.sleep(1)
  241. elmt.send_keys(kw)
  242. elmt.send_keys(Keys.ENTER)
  243. time.sleep(6)
  244. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  245. numresults=len(elmts)
  246. print('搜尋結果數量',numresults)
  247. idx=1
  248. found=False
  249. for elmt in elmts:
  250. href=elmt.get_attribute('href')
  251. txt=elmt.text
  252. if len(txt)>10:
  253. if domain in href:
  254. print('found....')
  255. print('clicked....')
  256. print(href)
  257. print(txt)
  258. print("ranking", idx)
  259. table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
  260. found=True
  261. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  262. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  263. time.sleep(5)
  264. break
  265. idx+=1
  266. if not found:
  267. table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
  268. except:
  269. print('exception')
  270. traceback.print_exc()
  271. driver.quit()
  272. sys.exit()
  273. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  274. ##data=r.get('personal_seo')
  275. #jstext=data.decode('utf-8')
  276. #jsobj=json.loads(jstext)
  277. #js=random.choice(jsobj)
  278. js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播']
  279. elmt1=random.choice(js)
  280. elmt2=random.choice(js)
  281. run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})