test_random.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. #import urllib
  9. import os
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support import expected_conditions as EC
  13. import dataset
  14. from selenium.webdriver.common.keys import Keys
  15. import json
  16. import random
  17. import time
  18. #import redis
  19. import sys
  20. import codecs
  21. import random
  22. import os
  23. import time
  24. import requests
  25. driver=None
  26. dockername='p4444'
  27. is_docker=True
  28. #is_docker=False
  29. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  30. #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  31. if is_docker:
  32. portnum=random.randint(4444,4555)
  33. print(portnum)
  34. os.system('docker container stop '+dockername)
  35. time.sleep(0.5)
  36. os.system('docker container rm '+dockername)
  37. time.sleep(0.5)
  38. os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0')
  39. time.sleep(7)
  40. def re_get_webdriver():
  41. global port
  42. global driver
  43. global portnum
  44. global is_docker
  45. result=[]
  46. if driver is not None:
  47. print('closing....')
  48. driver.quit()
  49. print('quit....')
  50. driver=None
  51. try:
  52. options = webdriver.ChromeOptions()
  53. options.add_argument("--no-sandbox")
  54. options.add_argument("--headless")
  55. options.add_argument("--incognito")
  56. # options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
  57. mobile_emulation = {
  58. "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
  59. "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
  60. # options.add_experimental_option("mobileEmulation", mobile_emulation)
  61. if is_docker:
  62. try:
  63. driver = webdriver.Remote(
  64. command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
  65. options=options)
  66. except:
  67. traceback.print_exc()
  68. time.sleep(9999)
  69. return None
  70. return driver
  71. try:
  72. driver = webdriver.Chrome(options=options)
  73. except:
  74. traceback.print_exc()
  75. return None
  76. return driver
  77. except:
  78. traceback.print_exc()
  79. driver=None
  80. return None
  81. return driver
  82. def run_once(jsobj):
  83. # table=db['seo_jobs_ranking']
  84. # table=db['seo_test_jobs_ranking']
  85. table=db['seo_jobs']
  86. # history=db['seo_search_history']
  87. print(jsobj)
  88. kw=jsobj['kw']
  89. i=100
  90. while True:
  91. driver=re_get_webdriver()
  92. print('re_get_webdriver')
  93. if driver is not None:
  94. break
  95. time.sleep(3)
  96. try:
  97. kw=jsobj['kw']
  98. if jsobj.get('domain') is None:
  99. exclude=jsobj['exclude']
  100. domain=None
  101. else:
  102. domain=jsobj['domain']
  103. exclude=None
  104. driver.get('https://www.google.com?num=100')
  105. time.sleep(1)
  106. while True:
  107. try:
  108. print(driver.current_url)
  109. break
  110. except:
  111. traceback.print_exc()
  112. driver=re_get_webdriver()
  113. time.sleep(3)
  114. driver.get('https://www.google.com?num=100')
  115. # time.sleep(3)
  116. time.sleep(3)
  117. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  118. elmt = driver.find_element(By.XPATH, "//textarea[@type='search']")
  119. time.sleep(1)
  120. elmt.send_keys(kw)
  121. elmt.send_keys(Keys.ENTER)
  122. time.sleep(3)
  123. # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  124. elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
  125. numresults=len(elmts)
  126. print('搜尋結果數量',numresults)
  127. if numresults==0:
  128. print(driver.current_url)
  129. print(driver.title)
  130. sys.exit()
  131. # time.sleep(9999)
  132. idx=1
  133. found=False
  134. test_lst=[]
  135. clickelmt=None
  136. neg_count=0
  137. neg_total=0
  138. clickidx=0
  139. clickhref=''
  140. clicktitle=''
  141. for elmt in elmts:
  142. href=elmt.get_attribute('href')
  143. txt=elmt.text
  144. # history.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
  145. if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt or '不倫' in txt or '開房' in txt or '摩鐵' in txt:
  146. neg_count+=1
  147. neg_total+=idx
  148. if len(txt)>10:
  149. if domain is not None:
  150. random.shuffle(domain)
  151. for d in domain:
  152. if d in href:
  153. print('found....')
  154. print('clicked....')
  155. print(href)
  156. print(txt)
  157. print("ranking", idx)
  158. found=True
  159. clickelmt=elmt
  160. clickidx=idx
  161. clickhref=href
  162. clicktitle=txt
  163. else:
  164. if exclude not in href:
  165. test_lst.append(elmt)
  166. idx+=1
  167. if exclude is not None:
  168. print('exclude')
  169. elmt=random.choice(test_lst)
  170. print(elmt)
  171. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  172. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  173. time.sleep(5)
  174. if neg_count ==0:
  175. negstr='0'
  176. else:
  177. negstr=str(neg_total/neg_count)
  178. print(' negative: ' +negstr)
  179. if neg_total > 0:
  180. print('negative.....')
  181. if not found:
  182. True
  183. print('not found')
  184. # table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
  185. else:
  186. webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
  187. webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
  188. print('clicked...')
  189. entry={'cust':jsobj['cust'],'plan':jsobj['plan'],'prefix':'','postfix':'','domain':str(domain),'kw':kw,'positive':str([''])}
  190. if numresults >=40:
  191. print(entry)
  192. table.insert(entry)
  193. # table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr})
  194. time.sleep(6)
  195. print('sleep 6')
  196. return
  197. except:
  198. traceback.print_exc()
  199. print('exception')
  200. traceback.print_exc()
  201. driver.quit()
  202. time.sleep(5)
  203. #r=random.randint(0,7)
  204. r=987
  205. #JNOTE: 關鍵字點擊
  206. related=''
  207. cursor=db.query('SELECT id,cust,plan,prefix,domain,kw,positive FROM public.seo_random_test_jobs order by random() limit 1')
  208. for c in cursor:
  209. id=c['id']
  210. cust=c['cust']
  211. kw=c['kw']
  212. plan=c['plan']
  213. prefix=c['prefix']
  214. domain=eval(c['domain'])
  215. positive=eval(c['positive'])
  216. break
  217. print('delete from seo_random_test_jobs where id='+str(c['id'])+'')
  218. db.query('delete from seo_random_test_jobs where id='+str(c['id'])+'')
  219. # positive=['台北室內設計公司排名']
  220. # positive=[related]
  221. # positive=['半 日照 植物 推薦']
  222. # positive=['3 坪 多大']
  223. # positive=['鞋櫃']
  224. # positive=['裝修屋子']
  225. # positive=['']
  226. # kw='幸福空間'
  227. # kw='輕裝修'
  228. # kw='輕裝修'
  229. #朱英凱
  230. #琢隱設計
  231. #os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com')
  232. newkw=prefix+" "+kw+' '+random.choice(positive)
  233. print(newkw)
  234. #newkw=kw
  235. run_once({'domain':domain,'kw':newkw,'id':id,'cust':cust,'plan':plan,'positive':positive})