etoday_use.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import urllib
  9. import os
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support import expected_conditions as EC
  13. import dataset
  14. from selenium.webdriver.common.keys import Keys
  15. import json
  16. import random
  17. import time
  18. import redis
  19. import sys
  20. import codecs
  21. import random
  22. import os
  23. import time
  24. import requests
  25. import datetime
  26. driver=None
  27. from fake_useragent import UserAgent
  28. ua = UserAgent()
  29. #proxy_enabled=True
  30. # proxy_enabled=False
  31. # # https://youtu.be/cR2M5Khgxvc
  32. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  33. # glog_table=db['general_log']
  34. def re_get_webdriver():
  35. # global port
  36. global driver
  37. global portnum
  38. # os.system('killall chrome')
  39. result=[]
  40. # if driver is not None:
  41. # print('closing....')
  42. # driver.quit()
  43. # print('quit....')
  44. # driver=None
  45. # os.system()
  46. options = webdriver.ChromeOptions()
  47. options.add_argument("--user-agent=" +ua.random)
  48. options.add_argument("--no-sandbox")
  49. options.add_argument("--headless")
  50. options.add_argument("--incognito")
  51. driver = webdriver.Remote(
  52. command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
  53. options=options)
  54. return driver
  55. # try:
  56. # options = webdriver.ChromeOptions()
  57. # options.add_argument("--no-sandbox")
  58. # options.add_argument("--headless")
  59. # options.add_argument("--incognito")
  60. # # if proxy_enabled:
  61. # # options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
  62. # try:
  63. # driver = webdriver.Remote(
  64. # command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
  65. # options=options)
  66. # except:
  67. # traceback.print_exc()
  68. # return None
  69. # return driver
  70. # except:
  71. # traceback.print_exc()
  72. # driver=None
  73. # return None
  74. # return driver
  75. def run_once():
  76. global count
  77. global portnum
  78. global bok
  79. # global glog_table
  80. # table=db['nda_log']
  81. # print(jsobj)
  82. # kw=jsobj['kw']
  83. # options = webdriver.ChromeOptions()
  84. # options.add_argument("--no-sandbox")
  85. # options.add_argument("--headless")
  86. # options.add_argument("--incognito")
  87. # driver = webdriver.Remote(
  88. # command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
  89. # options=options)
  90. # if driver is not None:
  91. # break
  92. ettoday_url_list = ['https://house.ettoday.net/news/1492047',
  93. 'https://house.ettoday.net/news/1492167',
  94. 'https://house.ettoday.net/news/1492288',
  95. 'https://house.ettoday.net/news/1492178',
  96. 'https://house.ettoday.net/news/1492229',
  97. 'https://house.ettoday.net/news/1492134',
  98. 'https://house.ettoday.net/news/1492240',
  99. 'https://house.ettoday.net/news/1492161',
  100. 'https://house.ettoday.net/news/1492168',
  101. 'https://house.ettoday.net/news/1492217']
  102. # try:
  103. for i in ettoday_url_list:
  104. try:
  105. driver=re_get_webdriver()
  106. except:
  107. portnum=random.randint(4555,4666)
  108. print(portnum)
  109. os.system('docker container stop p8816')
  110. time.sleep(5)
  111. os.system('docker container rm p8816')
  112. time.sleep(5)
  113. os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
  114. bok += 1
  115. count=0
  116. time.sleep(5)
  117. driver=re_get_webdriver()
  118. time.sleep(3)
  119. try:
  120. driver.get(i)
  121. time.sleep(3)
  122. elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
  123. webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
  124. time.sleep(3)
  125. webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
  126. print("cick!",i)
  127. count+=1
  128. print("count_time:",count,';borken_time:',bok)
  129. # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
  130. time.sleep(random.randint(3,7))
  131. driver.quit()
  132. except:
  133. driver.quit()
  134. print("wrong",i,';borken_time:',bok)
  135. time.sleep(5)
  136. # except:
  137. # print('wrong for:',i)
  138. # kw=jsobj['kw']
  139. # if jsobj.get('domain') is None:
  140. # exclude=jsobj['exclude']
  141. # domain=None
  142. # else:
  143. # domain=jsobj['domain']
  144. # exclude=None
  145. # driver.get('https://www.google.com?num=100')
  146. # time.sleep(17)
  147. # while True:
  148. # try:
  149. # print(driver.current_url)
  150. # break
  151. # except:
  152. # traceback.print_exc()
  153. # driver=re_get_webdriver()
  154. # time.sleep(3)
  155. # driver.get('https://www.google.com?num=100')
  156. # time.sleep(3)
  157. # time.sleep(3)
  158. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  159. # time.sleep(1)
  160. # elmt.send_keys(kw)
  161. # elmt.send_keys(Keys.ENTER)
  162. # time.sleep(6)
  163. # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  164. # numresults=len(elmts)
  165. # # time.sleep(9999)
  166. # print('搜尋結果數量',numresults)
  167. # if numresults==0:
  168. # print(driver.current_url)
  169. # print(driver.title)
  170. # sys.exit()
  171. # idx=1
  172. # found=False
  173. # test_lst=[]
  174. # for elmt in elmts:
  175. # href=elmt.get_attribute('href')
  176. # txt=elmt.text
  177. # if len(txt)>10:
  178. # if domain is not None:
  179. # for d in domain:
  180. # if d in href:
  181. # print('found....')
  182. # print('clicked....')
  183. # print(href)
  184. # print(txt)
  185. # print("ranking", idx)
  186. # found=True
  187. # webdriver.ActionChains(driver).move_to_element(elmt).perform()
  188. # # elmt.click()
  189. # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  190. # table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'})
  191. # time.sleep(6)
  192. # return
  193. # else:
  194. # if exclude not in href:
  195. # test_lst.append(elmt)
  196. # idx+=1
  197. # if exclude is not None:
  198. # print('exclude')
  199. # elmt=random.choice(test_lst)
  200. # print(elmt)
  201. # webdriver.ActionChains(driver).move_to_element(elmt).perform()
  202. # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  203. # time.sleep(5)
  204. # if not found:
  205. # table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
  206. # except:
  207. # traceback.print_exc()
  208. # print('exception')
  209. # traceback.print_exc()
  210. # time.sleep(5)
  211. # r=random.randint(0,27)
  212. # r=26
  213. # cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1')
  214. # for c in cursor:
  215. # js=json.loads(c['json'])
  216. # prefix=js['prefix']
  217. # postfix=js['postfix']
  218. # domain=js['domain'][0]
  219. # positive=js['positive']
  220. # rnd=js['rnd']
  221. portnum=random.randint(4555,4666)
  222. print(portnum)
  223. os.system('docker container stop p8816')
  224. time.sleep(5)
  225. os.system('docker container rm p8816')
  226. time.sleep(5)
  227. os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
  228. bok = 0
  229. count=0
  230. time.sleep(5)
  231. while True:
  232. # run_once()
  233. # time.sleep(10)
  234. run_once()
  235. # kw=random.choice(positive)
  236. # kw2=random.choice(rnd)
  237. # count=0
  238. # while True:
  239. # try:
  240. # run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2})
  241. # count+=1
  242. # except:
  243. # continue
  244. # print('中場休息 次數',count)
  245. # time.sleep(random.randint(120,150))