test_github.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. import os
  2. import sys
  3. from datetime import datetime
  4. from time import sleep
  5. from selenium import webdriver
  6. from selenium.common.exceptions import TimeoutException
  7. from selenium.webdriver.chrome.options import Options
  8. from selenium.webdriver.common.by import By
  9. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  10. from selenium.webdriver.common.keys import Keys
  11. from selenium.webdriver.support import expected_conditions as EC
  12. from selenium.webdriver.support.ui import WebDriverWait
  13. from threading import Thread
  14. from queue import Queue
  15. import argparse
  16. #chrome_driver_executable_path = ''
  17. #if getattr(sys, 'frozen', False):
  18. # # publish one file
  19. # chrome_driver_executable_path = sys._MEIPASS + r'\resource\chromedriver.exe'
  20. #else:
  21. # source
  22. # chrome_driver_executable_path = os.getcwd() + r'\resource\chromedriver.exe'
  23. capa = DesiredCapabilities.CHROME
  24. capa["pageLoadStrategy"] = "none"
  25. capa['loggingPrefs'] = {'driver': 'OFF', 'server': 'OFF', 'browser': 'OFF'}
  26. attempt = 0
  27. successful_attempt = 0
  28. final_sleep = 0
  29. timeout_ex = 0
  30. other_ex = 0
  31. recaptcha_ex = 0
  32. def main_script(keyword, site_url, max_successful_clicks, browser_visibility_flag):
  33. global attempt
  34. global successful_attempt
  35. global final_sleep
  36. global timeout_ex
  37. global other_ex
  38. global recaptcha_ex
  39. while successful_attempt < max_successful_clicks:
  40. options = Options()
  41. if not browser_visibility_flag:
  42. options.add_argument('--headless')
  43. options.add_argument("--js-flags=--expose-gc")
  44. options.add_argument("--enable-precise-memory-info")
  45. options.add_argument("--disable-popup-blocking")
  46. options.add_argument("--disable-default-apps")
  47. options.add_argument("disable-infobars")
  48. options.add_argument('--disable-contextual-search')
  49. options.add_argument("--disable-notifications")
  50. options.add_argument('--incognito')
  51. options.add_argument('--disable-application-cache')
  52. options.add_argument('--no-sandbox')
  53. options.add_argument('--disk-cache-size=0')
  54. options.add_argument('–-disable-restore-session-state')
  55. options.add_argument('--disable-extensions')
  56. options.add_argument('test-type')
  57. options.add_argument('--silent')
  58. options.add_argument('--log-level=3')
  59. options.add_argument("--proxy-server=socks5://127.0.0.1:9050")
  60. driver = None
  61. try:
  62. driver = webdriver.Chrome(
  63. # executable_path=chrome_driver_executable_path,
  64. chrome_options=options,
  65. desired_capabilities=capa)
  66. wait = WebDriverWait(driver, 40)
  67. driver.implicitly_wait(10)
  68. driver.set_page_load_timeout(80)
  69. # driver.get('http://google.com/search?q=' + urllib.parse.urlencode(keyword)) # does not work
  70. driver.get('http://google.com/')
  71. wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input[name="q"]')))
  72. driver.execute_script("window.stop();")
  73. driver.execute_script("window.stop();")
  74. attempt += 1
  75. elm = driver.find_element_by_css_selector('input[name="q"]')
  76. elm.send_keys(keyword)
  77. elm.send_keys(Keys.RETURN)
  78. sleep(3)
  79. recaptcha = None
  80. try:
  81. recaptcha = driver.find_elements_by_css_selector('#recaptcha')
  82. except:
  83. pass
  84. if not recaptcha:
  85. wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#res')))
  86. driver.execute_script("window.stop();")
  87. site_links = driver.find_elements_by_css_selector('#res a')
  88. site_links_filtered = filter(lambda x: site_url in x.get_attribute('href'), site_links)
  89. site_link = next(site_links_filtered)
  90. site_link.click()
  91. wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'body')))
  92. sleep(10)
  93. elm = driver.find_element_by_css_selector('body')
  94. elm.send_keys(Keys.RETURN)
  95. successful_attempt += 1
  96. final_sleep = 240 # staying in the website
  97. else:
  98. driver.execute_script("window.stop();")
  99. recaptcha_ex += 1
  100. final_sleep = 0
  101. except TimeoutException as ex:
  102. final_sleep = 0
  103. timeout_ex += 1
  104. except Exception as ex:
  105. final_sleep = 0
  106. other_ex += 1
  107. else:
  108. info_message = '{}\t|\tprocess pid: {: <6}\t|\t{: >5} attempt\t|\t{: >5} successful click\t|\t{: >5} timeout\t|\t {: >5} recaptcha \t|\t{: >5} error'.format(
  109. datetime.now().strftime('%Y/%m/%d %H:%M:%S'), os.getpid(), attempt,
  110. successful_attempt,
  111. timeout_ex,
  112. recaptcha_ex,
  113. other_ex
  114. )
  115. print(info_message)
  116. f = open('log.txt', 'a')
  117. f.write(info_message + '\n')
  118. f.close()
  119. finally:
  120. sleep(final_sleep)
  121. if driver:
  122. driver.close()
  123. del driver
  124. print()
  125. print('process pid: {} | finished {} successful clicks! \nprocess stopped...'.format(os.getpid(),
  126. max_successful_clicks))
  127. f = open('log.txt', 'a')
  128. f.write('process pid: {} | finished {} successful clicks! \nprocess stopped...'.format(os.getpid(),
  129. max_successful_clicks) + '\n')
  130. f.close()
  131. print()
  132. class WorkerThread(Thread):
  133. def __init__(self, q, keyword, site_url, max_successful_clicks, browser_visibility_flag):
  134. super().__init__()
  135. self.setDaemon(True)
  136. self.q = q
  137. self.keyword = keyword
  138. self.site_url = site_url
  139. self.max_successful_clicks = max_successful_clicks
  140. self.browser_visibility_flag = browser_visibility_flag
  141. def run(self):
  142. while True:
  143. print()
  144. print('automated searching started...')
  145. print()
  146. main_script(self.keyword, self.site_url, self.max_successful_clicks, self.browser_visibility_flag)
  147. self.q.task_done()
  148. #parser = argparse.ArgumentParser(prog='python automate_search_google.py',
  149. # description='A script to automatically search keywords on google and click on your desired website link as much as you want.')
  150. #parser.add_argument('k', help='list of keywords to search in format of "[\'keyword 1\',\'keyword 2\']"')
  151. #parser.add_argument('u', help='Url of your desired website to be clicked without http and www. eg: example.com')
  152. #parser.add_argument('m', type=int, help='maximum successful click count you want. eg: 500')
  153. #parser.add_argument('-v', '--visible', action='store_true',
  154. # help='browser visibility flag. if present the browser become visible')
  155. #args = parser.parse_args()
  156. #keywords = eval(args.k)
  157. keywords=['幸福空間']
  158. message = ' please start tor browser first '.upper()
  159. print()
  160. message_str = '#' * 10 + message + '#' * 10
  161. print('#' * len(message_str))
  162. print('#' * len(message_str))
  163. print('#' * 10 + ' ' * len(message) + '#' * 10)
  164. print(message_str)
  165. print('#' * 10 + ' ' * len(message) + '#' * 10)
  166. print('#' * len(message_str))
  167. print('#' * len(message_str))
  168. print()
  169. my_queue = Queue()
  170. for key in keywords:
  171. worker = WorkerThread(my_queue, key, 'hhh.com.tw', 1, 1)
  172. worker.start()
  173. for i in keywords:
  174. my_queue.put(i)
  175. sleep(1)
  176. my_queue.join()
  177. c = input('Press Enter to exit program... ')
  178. exit(0)