fast_9222.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. import urllib.parse
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support import expected_conditions as EC
  13. import dataset
  14. import codecs
  15. import random
  16. import requests
  17. import time
  18. import sys
  19. #import docker
  20. import codecs
  21. import random
  22. import os
  23. import time
  24. driver=None
  25. headers = {
  26. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  27. "Content-Type": "application/x-www-form-urlencoded"
  28. }
  29. def send_msg(kw):
  30. params = {"message": "處理關鍵字: "+kw}
  31. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  32. def empty_query(q):
  33. global driver
  34. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  35. driver.get(googleurl)
  36. time.sleep(3)
  37. def process_query(qs):
  38. q=qs[0]
  39. domain=qs[2]
  40. cnt=qs[1]
  41. global driver
  42. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  43. driver.get(googleurl)
  44. time.sleep(3)
  45. if cnt > 0:
  46. for i in range(cnt):
  47. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  48. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  49. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  50. time.sleep(3)
  51. elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  52. idx=1
  53. ranking=-1
  54. for elmt in elmts:
  55. href=elmt.get_attribute('href')
  56. txt=elmt.text
  57. if len(txt)>10:
  58. # if 'hhh.com.tw' in href:
  59. # if 'hhh.com.tw' in href:
  60. # if 'ai.choozmo.com' in href:
  61. if domain in href:
  62. # if 'searchome.net' in href:
  63. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  64. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  65. break
  66. def re_get_webdriver():
  67. global driver
  68. result=[]
  69. # client = docker.from_env()
  70. # ls=client.containers.list()
  71. # print(ls)
  72. # for l in ls:
  73. # if 'p17777' in l.name:
  74. # ls[0].restart()
  75. # time.sleep(4)
  76. if driver is not None:
  77. print('closing....')
  78. driver.quit()
  79. # options = webdriver.EdgeOptions()
  80. try:
  81. # driver = webdriver.Remote(
  82. # command_executor='http://127.0.0.1:4444/wd/hub')
  83. chrome_options = webdriver.ChromeOptions()
  84. # chrome_options.add_argument('--proxy-server=host.docker.internal:'+str(proxyport)) # Specify your Kubernetes service-name here
  85. chrome_options.add_argument('--ignore-certificate-errors')
  86. chrome_options.add_argument("--no-sandbox")
  87. chrome_options.add_argument("--headless")
  88. chrome_options.add_argument("--disable-dev-shm-usage")
  89. driver = webdriver.Chrome(options=chrome_options)
  90. # command_executor='http://127.0.0.1:4444/wd/hub',
  91. # desired_capabilities=chrome_options.to_capabilities()
  92. # )
  93. driver.set_window_size(1400,1000)
  94. return driver
  95. except:
  96. import traceback
  97. traceback.print_exc()
  98. driver=None
  99. return None
  100. driver=None
  101. def run_once(url):
  102. global driver
  103. i=random.randint(0,22)
  104. if i==0 or driver is None:
  105. # if True:
  106. time.sleep(6)
  107. re_get_webdriver()
  108. time.sleep(1)
  109. if driver is None:
  110. return
  111. try:
  112. driver.execute_script('window.open("'+url+'","_blank");')
  113. driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
  114. time.sleep(0.3)
  115. except:
  116. print('exception')
  117. #client = docker.from_env()
  118. #ls=client.containers.list()
  119. #print(ls)
  120. #for l in ls:
  121. # if 'p4444' in l.name:
  122. # ls[0].restart()
  123. #time.sleep(4)
  124. fpath=__file__
  125. #elmts=fpath.split('\\')
  126. #fpath=fpath.replace(elmts[-1],'urls.csv')
  127. os.system('docker restart p9222')
  128. print(fpath)
  129. lst=[]
  130. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  131. cursor=db.query('SELECT * FROM columnids order by rand()')
  132. for c in cursor:
  133. lst.append('https://www.hhh.com.tw/columns/detail/'+str(c['cid'])+'/index.php')
  134. #lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
  135. #for i in range(20):
  136. while True:
  137. l=random.choice(lst)
  138. print(l)
  139. # driver.get(l)
  140. run_once(l)
  141. # time.sleep(5)
  142. # time.sleep(2)