import traceback from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import datetime import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys import codecs import random from bs4 import BeautifulSoup import requests import time import rpyc import sys import docker import googlesearch import codecs import sys import time import dataset import os def scrolling(driver,pgnum): ub = driver.find_element_by_css_selector('body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.3) def process_query(driver,c): global db url=c['href'] driver.get('https://www.linkedin.com/in/'+url+'/overlay/contact-info/') time.sleep(3) email='' phone='' try: e_email=driver.find_element_by_xpath(".//a[contains(@href,'mailto:') ]") print(e_email.text) email=e_email.text print(e_email.get_attribute('href')) except: print('no email') try: e_phone=driver.find_element_by_xpath("//section[@class='pv-contact-info__contact-type ci-phone' ]//li[contains(@class,'pv-contact-info__ci-container')]") print(e_phone.text) phone=e_phone.text except: print('no phone') return {'email':email,'phone':phone} def restart_browser(): # os.system('docker container restart p4444') # time.sleep(10) options = webdriver.ChromeOptions() # options.add_argument("--proxy-server=socks5://130.61.93.198:1080") options.add_argument("start-maximized") options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data') options.add_argument('--profile-directory=Default') driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) #driver = webdriver.Remote( # command_executor='http://127.0.0.1:4444/wd/hub', #desired_capabilities=options.to_capabilities()) # desired_capabilities=DesiredCapabilities.CHROME) driver.set_window_size(1400,1000) return driver db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst=[] cursor=db.query('select id,href from linkedin_list where email is null order by rand()') for c in cursor: lst.append(c) driver=restart_browser() for c in lst: data=process_query(driver,c) db.query('update linkedin_list set email="'+data['email']+'", phone="'+data['phone']+'" where id="'+str(c['id'])+'"') print(data) time.sleep(2) time.sleep(9999)