| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 | """Example to bypass distil security (https://www.distilnetworks.com/) with Selenium.They use the javascript field navigator.webdriver to ban SeleniumThe solution is to inject javascript code before the laoding og the webpage, to set webdriver to falseWorks only with chromium driver"""from datetime import datetimeimport osimport sysfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWait, Selectfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.remote.webdriver import WebDriverimport timeimport jsondef send(driver, cmd, params={}):    """    Send command to chromium driver    """    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id    url = driver.command_executor._url + resource    body = json.dumps({'cmd': cmd, 'params': params})    response = driver.command_executor._request('POST', url, body)#    if response['status']:#        raise Exception(response.get('value'))    return response.get('value')def add_script(driver, script):    """    Inject script before loading page    Cf: https://stackoverflow.com/a/47298910    """    send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})def process(driver):    driver.add_script('const setProperty = () => {     Object.defineProperty(navigator, "webdriver", {       get: () => false,     }); }; setProperty();')    # load a page#    driver.get('https://www.similarweb.com')    driver.get('about:blank')    time.sleep(20)def init_webdriver():    """    Init selnium web driver for scraping website     """    WebDriver.add_script = add_script    dir_path = os.path.dirname(os.path.realpath(__file__))#    driver_path = r'%s/lib/chromedriver' % dir_path    options = webdriver.ChromeOptions()    driver = webdriver.Chrome(  chrome_options=options)    return driverif __name__ == '__main__':    driver = None    try:        driver = init_webdriver()        process(driver)    except Exception as e:#        logger.error('Error during process %s' % e)        raise e    finally:        if driver is not None:            driver.close()
 |