12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- import traceback
- from selenium import webdriver
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- import time
- import os
- import datetime
- import urllib.parse
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.keys import Keys
- import codecs
- import random
- from bs4 import BeautifulSoup
- import requests
- import time
- import rpyc
- import sys
- import docker
- import googlesearch
- import codecs
- import sys
- import time
- import dataset
- import os
- def scrolling(driver,pgnum):
- ub = driver.find_element_by_css_selector('body')
- for i in range(pgnum):
- ub.send_keys(Keys.PAGE_DOWN)
- if pgnum>1:
- time.sleep(0.3)
- def process_query(driver,c):
- global db
- url=c['href']
- driver.get('https://www.linkedin.com/in/'+url+'/overlay/contact-info/')
- time.sleep(3)
- email=''
- phone=''
- try:
- e_email=driver.find_element_by_xpath(".//a[contains(@href,'mailto:') ]")
- print(e_email.text)
- email=e_email.text
- print(e_email.get_attribute('href'))
- except:
- print('no email')
- try:
- e_phone=driver.find_element_by_xpath("//section[@class='pv-contact-info__contact-type ci-phone' ]//li[contains(@class,'pv-contact-info__ci-container')]")
- print(e_phone.text)
- phone=e_phone.text
- except:
- print('no phone')
- return {'email':email,'phone':phone}
- def restart_browser():
- # os.system('docker container restart p4444')
- # time.sleep(10)
- options = webdriver.ChromeOptions()
- # options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
- options.add_argument("start-maximized")
- options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
- options.add_argument('--profile-directory=Default')
- driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
- #driver = webdriver.Remote(
- # command_executor='http://127.0.0.1:4444/wd/hub',
- #desired_capabilities=options.to_capabilities())
- # desired_capabilities=DesiredCapabilities.CHROME)
- driver.set_window_size(1400,1000)
- return driver
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- lst=[]
- cursor=db.query('select id,href from linkedin_list where email is null order by rand()')
- for c in cursor:
- lst.append(c)
- driver=restart_browser()
- for c in lst:
- data=process_query(driver,c)
- db.query('update linkedin_list set email="'+data['email']+'", phone="'+data['phone']+'" where id="'+str(c['id'])+'"')
- print(data)
- time.sleep(2)
- time.sleep(9999)
|