bypass.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. """
  2. Example to bypass distil security (https://www.distilnetworks.com/) with Selenium.
  3. They use the javascript field navigator.webdriver to ban Selenium
  4. The solution is to inject javascript code before the laoding og the webpage, to set webdriver to false
  5. Works only with chromium driver
  6. """
  7. from datetime import datetime
  8. import os
  9. import sys
  10. from selenium import webdriver
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support.ui import WebDriverWait, Select
  13. from selenium.webdriver.support import expected_conditions as EC
  14. from selenium.webdriver.common.keys import Keys
  15. from selenium.webdriver.remote.webdriver import WebDriver
  16. import time
  17. import json
  18. def send(driver, cmd, params={}):
  19. """
  20. Send command to chromium driver
  21. """
  22. resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
  23. url = driver.command_executor._url + resource
  24. body = json.dumps({'cmd': cmd, 'params': params})
  25. response = driver.command_executor._request('POST', url, body)
  26. # if response['status']:
  27. # raise Exception(response.get('value'))
  28. return response.get('value')
  29. def add_script(driver, script):
  30. """
  31. Inject script before loading page
  32. Cf: https://stackoverflow.com/a/47298910
  33. """
  34. send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})
  35. def process(driver):
  36. driver.add_script('const setProperty = () => { Object.defineProperty(navigator, "webdriver", { get: () => false, }); }; setProperty();')
  37. # load a page
  38. # driver.get('https://www.similarweb.com')
  39. driver.get('about:blank')
  40. time.sleep(20)
  41. def init_webdriver():
  42. """
  43. Init selnium web driver for scraping website
  44. """
  45. WebDriver.add_script = add_script
  46. dir_path = os.path.dirname(os.path.realpath(__file__))
  47. # driver_path = r'%s/lib/chromedriver' % dir_path
  48. options = webdriver.ChromeOptions()
  49. driver = webdriver.Chrome( chrome_options=options)
  50. return driver
  51. if __name__ == '__main__':
  52. driver = None
  53. try:
  54. driver = init_webdriver()
  55. process(driver)
  56. except Exception as e:
  57. # logger.error('Error during process %s' % e)
  58. raise e
  59. finally:
  60. if driver is not None:
  61. driver.close()