from selenium import webdriver #from seleniumwire import webdriver import time #import networkx as nx #import dataset #import pickle #import codecs from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.webdriver import WebDriver import sys import os import time import re def interceptor(request): del request.headers['Referer'] # Remember to delete the header first request.headers['Referer'] = 'https://www.google.com/' # Spoof the referer def send(driver, cmd, params={}): """ Send command to chromium driver """ resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id url = driver.command_executor._url + resource body = json.dumps({'cmd': cmd, 'params': params}) response = driver.command_executor._request('POST', url, body) if response['status']: raise Exception(response.get('value')) return response.get('value') def add_script(driver, script): """ Inject script before loading page Cf: https://stackoverflow.com/a/47298910 """ send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script}) class JBrowser: def __init__(self): if os.name=='nt': os.chdir("C:\\Program Files\\Google\\Chrome\\Application\\") def set_profile_path(self,profilepath): self.profilepath=profilepath option = webdriver.ChromeOptions() option.add_argument('--disable-web-security') option.add_argument('--allow-running-insecure-content') # option.add_argument('--headless') # option.add_argument("window-size=1280,800") # option.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36") option.add_argument('--disable-blink-features=AutomationControlled') if profilepath is not None: if os.name=='nt' : option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\") # option.add_argument("--user-data-dir='C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\'") # option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\") # option.add_argument("--profile-directory='"+self.profilepath+"'") else: option.add_argument("--user-data-dir="+self.profilepath) option.add_argument('--profile-directory="Profile 1"') option.add_experimental_option("excludeSwitches", ["enable-automation"]) option.add_experimental_option('useAutomationExtension', False) self.option=option WebDriver.add_script = add_script driver = webdriver.Chrome(options=option) driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'}) driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") driver.request_interceptor = interceptor self.driver=driver driver.delete_all_cookies() # executor_url = driver.command_executor._url # session_id = driver.session_id def scrolling(self,pgnum): ub = self.driver.find_element_by_css_selector('body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.5) def get(self,url): self.driver.get(url) def get_driver(self): return self.driver