gen_seo.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. from userAgentRandomizer import userAgents
  24. import requests
  25. driver=None
  26. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  27. headers = {
  28. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  29. "Content-Type": "application/x-www-form-urlencoded"
  30. }
  31. def send_msg(kw):
  32. params = {"message":kw}
  33. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  34. def re_get_webdriver():
  35. global driver
  36. result=[]
  37. if driver is not None:
  38. print('closing....')
  39. driver.quit()
  40. os.system('killall chrome')
  41. print('quit....')
  42. driver=None
  43. try:
  44. ua = userAgents()
  45. user_agent = ua.random()
  46. options = webdriver.ChromeOptions()
  47. options.add_argument("--no-sandbox")
  48. options.add_argument("--disable-dev-shm-usage")
  49. options.add_argument("--headless")
  50. options.add_argument('--remote-debugging-port=9922')
  51. # options.add_experimental_option("debuggerAddress", '127.0.0.1:9922')
  52. print(user_agent)
  53. # options.add_argument("--user-agent=" +user_agent)
  54. options.add_argument("--incognito")
  55. driver=None
  56. try:
  57. driver = webdriver.Chrome(options=options)
  58. except:
  59. # driver.quit()
  60. # os.system('pkill -f ')
  61. # os.system('kill %d' % os.getpid())
  62. traceback.print_exc()
  63. sys.exit()
  64. return
  65. driver.set_window_size(1400,1000)
  66. return
  67. except:
  68. traceback.print_exc()
  69. driver=None
  70. return None
  71. def from_shopping(kw):
  72. global driver
  73. driver.get('https://shopping.google.com')
  74. time.sleep(5)
  75. elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
  76. elmt.send_keys('幸福空間')
  77. elmt.send_keys(Keys.ENTER) #hits space
  78. time.sleep(7)
  79. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  80. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  81. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  82. time.sleep(5)
  83. def from_book(kw):
  84. global driver
  85. driver.get('https://books.google.com/')
  86. time.sleep(5)
  87. elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
  88. elmt.send_keys('幸福空間')
  89. elmt.send_keys(Keys.ENTER) #hits space
  90. time.sleep(7)
  91. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  92. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  93. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  94. time.sleep(5)
  95. def from_wiki(kw):
  96. global driver
  97. driver.get('https://en.wikipedia.org/wiki/Google_Search')
  98. time.sleep(4)
  99. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  100. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  101. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  102. time.sleep(5)
  103. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  104. time.sleep(1)
  105. elmt.send_keys(kw)
  106. elmt.send_keys(Keys.ENTER)
  107. time.sleep(6)
  108. def from_bing(kw):
  109. global driver
  110. driver.get('https://www.bing.com/search?q=google')
  111. time.sleep(4)
  112. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  113. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  114. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  115. time.sleep(5)
  116. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  117. time.sleep(1)
  118. elmt.send_keys(kw)
  119. elmt.send_keys(Keys.ENTER)
  120. time.sleep(6)
  121. def from_ecosia(kw):
  122. global driver
  123. driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
  124. time.sleep(4)
  125. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  126. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  127. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  128. time.sleep(5)
  129. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  130. time.sleep(1)
  131. elmt.send_keys(kw)
  132. elmt.send_keys(Keys.ENTER)
  133. time.sleep(6)
  134. def from_brave(kw):
  135. global driver
  136. driver.get('https://search.brave.com/search?q=google&source=web')
  137. time.sleep(4)
  138. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  139. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  140. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  141. time.sleep(5)
  142. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  143. time.sleep(1)
  144. elmt.send_keys(kw)
  145. elmt.send_keys(Keys.ENTER)
  146. time.sleep(6)
  147. def from_duckduckgo(kw):
  148. global driver
  149. driver.get('https://duckduckgo.com/?q=google')
  150. time.sleep(4)
  151. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  152. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  153. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  154. time.sleep(5)
  155. # time.sleep(9999)
  156. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  157. time.sleep(1)
  158. elmt.send_keys(kw)
  159. elmt.send_keys(Keys.ENTER)
  160. time.sleep(6)
  161. def from_ekoru(kw):
  162. global driver
  163. driver.get('https://www.ekoru.org/?q=google')
  164. time.sleep(4)
  165. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  166. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  167. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  168. time.sleep(5)
  169. # time.sleep(9999)
  170. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  171. time.sleep(1)
  172. elmt.send_keys(kw)
  173. elmt.send_keys(Keys.ENTER)
  174. time.sleep(6)
  175. def from_yahoo(kw):
  176. global driver
  177. driver.get('https://search.yahoo.com/search?p=google')
  178. time.sleep(4)
  179. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  180. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  181. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  182. time.sleep(2)
  183. driver.switch_to.window(driver.window_handles[1])
  184. time.sleep(3)
  185. print(driver.current_url)
  186. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  187. time.sleep(1)
  188. elmt.send_keys(kw)
  189. elmt.send_keys(Keys.ENTER)
  190. time.sleep(6)
  191. def from_gibiru(kw):
  192. global driver
  193. driver.get('https://gibiru.com/results.html?q=google')
  194. time.sleep(4)
  195. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  196. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  197. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  198. time.sleep(2)
  199. driver.switch_to.window(driver.window_handles[1])
  200. time.sleep(3)
  201. print(driver.current_url)
  202. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  203. time.sleep(1)
  204. elmt.send_keys(kw)
  205. elmt.send_keys(Keys.ENTER)
  206. time.sleep(6)
  207. def run_once(jsobj):
  208. table=db['rank_detection']
  209. print(jsobj)
  210. global driver
  211. # i=random.randint(0,9)
  212. i=100
  213. if driver is None:
  214. time.sleep(8)
  215. re_get_webdriver()
  216. if driver is None:
  217. return
  218. try:
  219. kw=jsobj['kw']
  220. if jsobj.get('domain') is None:
  221. exclude=jsobj['exclude']
  222. domain=None
  223. else:
  224. domain=jsobj['domain']
  225. exclude=None
  226. if i==0:
  227. from_book(kw)
  228. elif i==1:
  229. from_shopping(kw)
  230. elif i==2:
  231. from_wiki(kw)
  232. elif i==3:
  233. from_bing(kw)
  234. elif i==4:
  235. from_duckduckgo(kw)
  236. elif i==5:
  237. from_yahoo(kw)
  238. elif i==6:
  239. from_gibiru(kw)
  240. elif i==7:
  241. from_ekoru(kw)
  242. elif i==8:
  243. from_ecosia(kw)
  244. elif i==9:
  245. from_brave(kw)
  246. # time.sleep(9999)
  247. # driver.get('https://www.google.com?num=100')
  248. driver.get('https://www.google.com?num=20')
  249. time.sleep(3)
  250. print(driver.current_url)
  251. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  252. time.sleep(1)
  253. elmt.send_keys(kw)
  254. elmt.send_keys(Keys.ENTER)
  255. time.sleep(6)
  256. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  257. numresults=len(elmts)
  258. print('搜尋結果數量',numresults)
  259. if numresults==0:
  260. send_msg('stop working...')
  261. sys.exit()
  262. idx=1
  263. found=False
  264. test_lst=[]
  265. for elmt in elmts:
  266. href=elmt.get_attribute('href')
  267. txt=elmt.text
  268. if len(txt)>10:
  269. if domain is not None:
  270. if domain in href:
  271. print('found....')
  272. print('clicked....')
  273. print(href)
  274. print(txt)
  275. print("ranking", idx)
  276. table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
  277. found=True
  278. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  279. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  280. time.sleep(5)
  281. break
  282. else:
  283. if exclude not in href:
  284. test_lst.append(elmt)
  285. idx+=1
  286. if exclude is not None:
  287. test_lst=test_lst[9:]
  288. print('exclude')
  289. elmt=random.choice(test_lst)
  290. print(elmt)
  291. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  292. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  293. time.sleep(5)
  294. if not found:
  295. table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
  296. except:
  297. print('exception')
  298. traceback.print_exc()
  299. driver.quit()
  300. sys.exit()
  301. os.system('docker container restart tiny1')
  302. time.sleep(6)
  303. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  304. ##data=r.get('personal_seo')
  305. #jstext=data.decode('utf-8')
  306. #jsobj=json.loads(jstext)
  307. #js=random.choice(jsobj)
  308. #js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast']
  309. js=['seo','台北','新北','新竹','竹北','台灣','最強','choozmo','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播','廣告','投放','5g','元宇宙','ppt','mp4','podcast','pptx']
  310. hhh=['幸福空間','幸福 空間','室內設計 幸福','室內設計 幸福空間','裝潢 幸福空間','幸福空間']
  311. elmt1=random.choice(js)
  312. elmt2=random.choice(js)
  313. hhh1=random.choice(hhh)
  314. op=random.randint(0,5)
  315. #run_once({'domain':'hhh.com.tw','kw':'班尼斯'})
  316. run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})