linkedin_detail.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import traceback
  2. from selenium import webdriver
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. import time
  5. import os
  6. import datetime
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.support import expected_conditions as EC
  11. from selenium.webdriver.common.keys import Keys
  12. import codecs
  13. import random
  14. from bs4 import BeautifulSoup
  15. import requests
  16. import time
  17. import rpyc
  18. import sys
  19. import docker
  20. import googlesearch
  21. import codecs
  22. import sys
  23. import time
  24. import dataset
  25. import os
  26. def scrolling(driver,pgnum):
  27. ub = driver.find_element_by_css_selector('body')
  28. for i in range(pgnum):
  29. ub.send_keys(Keys.PAGE_DOWN)
  30. if pgnum>1:
  31. time.sleep(0.3)
  32. def process_query(driver,c):
  33. global db
  34. url=c['href']
  35. driver.get('https://www.linkedin.com/in/'+url+'/overlay/contact-info/')
  36. time.sleep(3)
  37. email=''
  38. phone=''
  39. try:
  40. e_email=driver.find_element_by_xpath(".//a[contains(@href,'mailto:') ]")
  41. print(e_email.text)
  42. email=e_email.text
  43. print(e_email.get_attribute('href'))
  44. except:
  45. print('no email')
  46. try:
  47. e_phone=driver.find_element_by_xpath("//section[@class='pv-contact-info__contact-type ci-phone' ]//li[contains(@class,'pv-contact-info__ci-container')]")
  48. print(e_phone.text)
  49. phone=e_phone.text
  50. except:
  51. print('no phone')
  52. return {'email':email,'phone':phone}
  53. def restart_browser():
  54. # os.system('docker container restart p4444')
  55. # time.sleep(10)
  56. options = webdriver.ChromeOptions()
  57. # options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
  58. options.add_argument("start-maximized")
  59. options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
  60. options.add_argument('--profile-directory=Default')
  61. driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
  62. #driver = webdriver.Remote(
  63. # command_executor='http://127.0.0.1:4444/wd/hub',
  64. #desired_capabilities=options.to_capabilities())
  65. # desired_capabilities=DesiredCapabilities.CHROME)
  66. driver.set_window_size(1400,1000)
  67. return driver
  68. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  69. lst=[]
  70. cursor=db.query('select id,href from linkedin_list where email is null order by rand()')
  71. for c in cursor:
  72. lst.append(c)
  73. driver=restart_browser()
  74. for c in lst:
  75. data=process_query(driver,c)
  76. db.query('update linkedin_list set email="'+data['email']+'", phone="'+data['phone']+'" where id="'+str(c['id'])+'"')
  77. print(data)
  78. time.sleep(2)
  79. time.sleep(9999)