priv_seo.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. from userAgentRandomizer import userAgents
  24. driver=None
  25. def re_get_webdriver():
  26. global driver
  27. result=[]
  28. if driver is not None:
  29. print('closing....')
  30. driver.quit()
  31. os.system('killall chrome')
  32. print('quit....')
  33. driver=None
  34. try:
  35. ua = userAgents()
  36. user_agent = ua.random()
  37. options = webdriver.ChromeOptions()
  38. options.add_argument("--no-sandbox")
  39. options.add_argument("--disable-dev-shm-usage")
  40. options.add_argument("--headless")
  41. print(user_agent)
  42. # options.add_argument("--user-agent=" +user_agent)
  43. options.add_argument("--incognito")
  44. driver=None
  45. try:
  46. driver = webdriver.Chrome(options=options)
  47. except:
  48. # driver.quit()
  49. # os.system('pkill -f ')
  50. os.system('kill %d' % os.getpid())
  51. sys.exit()
  52. return
  53. driver.set_window_size(1400,1000)
  54. return
  55. except:
  56. import traceback
  57. traceback.print_exc()
  58. driver=None
  59. return None
  60. def from_shopping(kw):
  61. global driver
  62. driver.get('https://shopping.google.com')
  63. time.sleep(5)
  64. elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']")
  65. elmt.send_keys('幸福空間')
  66. elmt.send_keys(Keys.ENTER) #hits space
  67. time.sleep(7)
  68. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  69. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  70. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  71. time.sleep(5)
  72. def from_book(kw):
  73. global driver
  74. driver.get('https://books.google.com/')
  75. time.sleep(5)
  76. elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']")
  77. elmt.send_keys('幸福空間')
  78. elmt.send_keys(Keys.ENTER) #hits space
  79. time.sleep(7)
  80. elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]")
  81. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  82. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  83. time.sleep(5)
  84. def from_wiki(kw):
  85. global driver
  86. driver.get('https://en.wikipedia.org/wiki/Google_Search')
  87. time.sleep(4)
  88. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  89. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  90. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  91. time.sleep(5)
  92. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  93. time.sleep(1)
  94. elmt.send_keys(kw)
  95. elmt.send_keys(Keys.ENTER)
  96. time.sleep(6)
  97. def from_bing(kw):
  98. global driver
  99. driver.get('https://www.bing.com/search?q=google')
  100. time.sleep(4)
  101. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  102. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  103. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  104. time.sleep(5)
  105. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  106. time.sleep(1)
  107. elmt.send_keys(kw)
  108. elmt.send_keys(Keys.ENTER)
  109. time.sleep(6)
  110. def from_ecosia(kw):
  111. global driver
  112. driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE')
  113. time.sleep(4)
  114. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  115. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  116. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  117. time.sleep(5)
  118. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  119. time.sleep(1)
  120. elmt.send_keys(kw)
  121. elmt.send_keys(Keys.ENTER)
  122. time.sleep(6)
  123. def from_brave(kw):
  124. global driver
  125. driver.get('https://search.brave.com/search?q=google&source=web')
  126. time.sleep(4)
  127. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  128. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  129. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  130. time.sleep(5)
  131. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  132. time.sleep(1)
  133. elmt.send_keys(kw)
  134. elmt.send_keys(Keys.ENTER)
  135. time.sleep(6)
  136. def from_duckduckgo(kw):
  137. global driver
  138. driver.get('https://duckduckgo.com/?q=google')
  139. time.sleep(4)
  140. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  141. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  142. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  143. time.sleep(5)
  144. # time.sleep(9999)
  145. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  146. time.sleep(1)
  147. elmt.send_keys(kw)
  148. elmt.send_keys(Keys.ENTER)
  149. time.sleep(6)
  150. def from_ekoru(kw):
  151. global driver
  152. driver.get('https://www.ekoru.org/?q=google')
  153. time.sleep(4)
  154. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  155. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  156. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  157. time.sleep(5)
  158. # time.sleep(9999)
  159. elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']")
  160. time.sleep(1)
  161. elmt.send_keys(kw)
  162. elmt.send_keys(Keys.ENTER)
  163. time.sleep(6)
  164. def from_yahoo(kw):
  165. global driver
  166. driver.get('https://search.yahoo.com/search?p=google')
  167. time.sleep(4)
  168. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  169. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  170. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  171. time.sleep(2)
  172. driver.switch_to.window(driver.window_handles[1])
  173. time.sleep(3)
  174. print(driver.current_url)
  175. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  176. time.sleep(1)
  177. elmt.send_keys(kw)
  178. elmt.send_keys(Keys.ENTER)
  179. time.sleep(6)
  180. def from_gibiru(kw):
  181. global driver
  182. driver.get('https://gibiru.com/results.html?q=google')
  183. time.sleep(4)
  184. elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]")
  185. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  186. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  187. time.sleep(2)
  188. driver.switch_to.window(driver.window_handles[1])
  189. time.sleep(3)
  190. print(driver.current_url)
  191. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  192. time.sleep(1)
  193. elmt.send_keys(kw)
  194. elmt.send_keys(Keys.ENTER)
  195. time.sleep(6)
  196. def run_once(jsobj):
  197. global driver
  198. i=random.randint(0,9)
  199. if driver is None:
  200. time.sleep(8)
  201. re_get_webdriver()
  202. if driver is None:
  203. return
  204. try:
  205. kw=jsobj['kw']
  206. domain=jsobj['domain']
  207. # kw='leo 娛樂城 484'
  208. # kw='leo 娛樂城 3011'
  209. # domain='tha484.com'
  210. # domain='tha3011.com'
  211. if i==0:
  212. from_book(kw)
  213. elif i==1:
  214. from_shopping(kw)
  215. elif i==2:
  216. from_wiki(kw)
  217. elif i==3:
  218. from_bing(kw)
  219. elif i==4:
  220. from_duckduckgo(kw)
  221. elif i==5:
  222. from_yahoo(kw)
  223. elif i==6:
  224. from_gibiru(kw)
  225. elif i==7:
  226. from_ekoru(kw)
  227. elif i==8:
  228. from_ecosia(kw)
  229. elif i==9:
  230. from_brave(kw)
  231. # time.sleep(9999)
  232. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  233. print('搜尋結果數量',len(elmts))
  234. idx=1
  235. for elmt in elmts:
  236. href=elmt.get_attribute('href')
  237. txt=elmt.text
  238. if len(txt)>10:
  239. if domain in href:
  240. print('found....')
  241. print('clicked....')
  242. print(href)
  243. print(txt)
  244. print("ranking", idx)
  245. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  246. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  247. time.sleep(5)
  248. break
  249. idx+=1
  250. except:
  251. print('exception')
  252. traceback.print_exc()
  253. driver.quit()
  254. sys.exit()
  255. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  256. data=r.get('personal_seo')
  257. jstext=data.decode('utf-8')
  258. jsobj=json.loads(jstext)
  259. js=random.choice(jsobj)
  260. run_once(js)