gen_seo.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. from userAgentRandomizer import userAgents
  24. import requests
  25. driver=None
  26. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  27. headers = {
  28. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  29. "Content-Type": "application/x-www-form-urlencoded"
  30. }
  31. def send_msg(kw):
  32. params = {"message":kw}
  33. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  34. def re_get_webdriver():
  35. global driver
  36. result=[]
  37. if driver is not None:
  38. print('closing....')
  39. driver.quit()
  40. os.system('killall chrome')
  41. print('quit....')
  42. driver=None
  43. try:
  44. ua = userAgents()
  45. user_agent = ua.random()
  46. options = webdriver.ChromeOptions()
  47. options.add_argument("--no-sandbox")
  48. options.add_argument("--disable-dev-shm-usage")
  49. options.add_argument("--headless")
  50. print(user_agent)
  51. # options.add_argument("--user-agent=" +user_agent)
  52. options.add_argument("--incognito")
  53. driver=None
  54. try:
  55. driver = webdriver.Chrome(options=options)
  56. except:
  57. # driver.quit()
  58. # os.system('pkill -f ')
  59. os.system('kill %d' % os.getpid())
  60. sys.exit()
  61. return
  62. driver.set_window_size(1400,1000)
  63. return
  64. except:
  65. import traceback
  66. traceback.print_exc()
  67. driver=None
  68. return None
  69. def from_shopping(kw):
  70. global driver
  71. driver.get('https://shopping.google.com')
  72. time.sleep(5)
  73. elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
  74. elmt.send_keys('幸福空間')
  75. elmt.send_keys(Keys.ENTER) #hits space
  76. time.sleep(7)
  77. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  78. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  79. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  80. time.sleep(5)
  81. def from_book(kw):
  82. global driver
  83. driver.get('https://books.google.com/')
  84. time.sleep(5)
  85. elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
  86. elmt.send_keys('幸福空間')
  87. elmt.send_keys(Keys.ENTER) #hits space
  88. time.sleep(7)
  89. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  90. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  91. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  92. time.sleep(5)
  93. def from_wiki(kw):
  94. global driver
  95. driver.get('https://en.wikipedia.org/wiki/Google_Search')
  96. time.sleep(4)
  97. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  98. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  99. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  100. time.sleep(5)
  101. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  102. time.sleep(1)
  103. elmt.send_keys(kw)
  104. elmt.send_keys(Keys.ENTER)
  105. time.sleep(6)
  106. def from_bing(kw):
  107. global driver
  108. driver.get('https://www.bing.com/search?q=google')
  109. time.sleep(4)
  110. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  111. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  112. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  113. time.sleep(5)
  114. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  115. time.sleep(1)
  116. elmt.send_keys(kw)
  117. elmt.send_keys(Keys.ENTER)
  118. time.sleep(6)
  119. def from_ecosia(kw):
  120. global driver
  121. driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
  122. time.sleep(4)
  123. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  124. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  125. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  126. time.sleep(5)
  127. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  128. time.sleep(1)
  129. elmt.send_keys(kw)
  130. elmt.send_keys(Keys.ENTER)
  131. time.sleep(6)
  132. def from_brave(kw):
  133. global driver
  134. driver.get('https://search.brave.com/search?q=google&source=web')
  135. time.sleep(4)
  136. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  137. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  138. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  139. time.sleep(5)
  140. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  141. time.sleep(1)
  142. elmt.send_keys(kw)
  143. elmt.send_keys(Keys.ENTER)
  144. time.sleep(6)
  145. def from_duckduckgo(kw):
  146. global driver
  147. driver.get('https://duckduckgo.com/?q=google')
  148. time.sleep(4)
  149. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  150. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  151. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  152. time.sleep(5)
  153. # time.sleep(9999)
  154. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  155. time.sleep(1)
  156. elmt.send_keys(kw)
  157. elmt.send_keys(Keys.ENTER)
  158. time.sleep(6)
  159. def from_ekoru(kw):
  160. global driver
  161. driver.get('https://www.ekoru.org/?q=google')
  162. time.sleep(4)
  163. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  164. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  165. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  166. time.sleep(5)
  167. # time.sleep(9999)
  168. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  169. time.sleep(1)
  170. elmt.send_keys(kw)
  171. elmt.send_keys(Keys.ENTER)
  172. time.sleep(6)
  173. def from_yahoo(kw):
  174. global driver
  175. driver.get('https://search.yahoo.com/search?p=google')
  176. time.sleep(4)
  177. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  178. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  179. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  180. time.sleep(2)
  181. driver.switch_to.window(driver.window_handles[1])
  182. time.sleep(3)
  183. print(driver.current_url)
  184. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  185. time.sleep(1)
  186. elmt.send_keys(kw)
  187. elmt.send_keys(Keys.ENTER)
  188. time.sleep(6)
  189. def from_gibiru(kw):
  190. global driver
  191. driver.get('https://gibiru.com/results.html?q=google')
  192. time.sleep(4)
  193. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  194. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  195. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  196. time.sleep(2)
  197. driver.switch_to.window(driver.window_handles[1])
  198. time.sleep(3)
  199. print(driver.current_url)
  200. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  201. time.sleep(1)
  202. elmt.send_keys(kw)
  203. elmt.send_keys(Keys.ENTER)
  204. time.sleep(6)
  205. def run_once(jsobj):
  206. table=db['rank_detection']
  207. print(jsobj)
  208. global driver
  209. # i=random.randint(0,9)
  210. i=100
  211. if driver is None:
  212. time.sleep(8)
  213. re_get_webdriver()
  214. if driver is None:
  215. return
  216. try:
  217. kw=jsobj['kw']
  218. domain=jsobj['domain']
  219. # kw='leo 娛樂城 484'
  220. # kw='leo 娛樂城 3011'
  221. # domain='tha484.com'
  222. # domain='tha3011.com'
  223. if i==0:
  224. from_book(kw)
  225. elif i==1:
  226. from_shopping(kw)
  227. elif i==2:
  228. from_wiki(kw)
  229. elif i==3:
  230. from_bing(kw)
  231. elif i==4:
  232. from_duckduckgo(kw)
  233. elif i==5:
  234. from_yahoo(kw)
  235. elif i==6:
  236. from_gibiru(kw)
  237. elif i==7:
  238. from_ekoru(kw)
  239. elif i==8:
  240. from_ecosia(kw)
  241. elif i==9:
  242. from_brave(kw)
  243. # time.sleep(9999)
  244. driver.get('https://www.google.com?num=100')
  245. time.sleep(3)
  246. print(driver.current_url)
  247. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  248. time.sleep(1)
  249. elmt.send_keys(kw)
  250. elmt.send_keys(Keys.ENTER)
  251. time.sleep(6)
  252. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  253. numresults=len(elmts)
  254. print('搜尋結果數量',numresults)
  255. if numresults==0:
  256. send_msg('stop working...')
  257. sys.exit()
  258. idx=1
  259. found=False
  260. for elmt in elmts:
  261. href=elmt.get_attribute('href')
  262. txt=elmt.text
  263. if len(txt)>10:
  264. if domain in href:
  265. print('found....')
  266. print('clicked....')
  267. print(href)
  268. print(txt)
  269. print("ranking", idx)
  270. table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
  271. found=True
  272. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  273. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  274. time.sleep(5)
  275. break
  276. idx+=1
  277. if not found:
  278. table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
  279. except:
  280. print('exception')
  281. traceback.print_exc()
  282. driver.quit()
  283. sys.exit()
  284. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  285. ##data=r.get('personal_seo')
  286. #jstext=data.decode('utf-8')
  287. #jsobj=json.loads(jstext)
  288. #js=random.choice(jsobj)
  289. #js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast']
  290. js=['seo','台北','新北','新竹','竹北','台灣','最強','choozmo','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast','pptx']
  291. elmt1=random.choice(js)
  292. elmt2=random.choice(js)
  293. #run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})
  294. #run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" 集仕多"})
  295. run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2})
  296. #run_once({'domain':'choozmo.com','kw':elmt1+" 集仕多"})
  297. #run_once({'domain':'choozmo.com','kw':"企業 研發 委外"})
  298. #run_once({'domain':'choozmo.com','kw':"企業 系統 研發"})
  299. #run_once({'domain':'choozmo.com','kw':"企業 研發 方案 委外"})
  300. #run_once({'domain':'choozmo.com','kw':"集仕多 委外"})