|
@@ -0,0 +1,111 @@
|
|
|
+import traceback
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
+import time
|
|
|
+import os
|
|
|
+import datetime
|
|
|
+import urllib.parse
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
+import codecs
|
|
|
+import random
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+import requests
|
|
|
+import time
|
|
|
+import rpyc
|
|
|
+import sys
|
|
|
+import docker
|
|
|
+import googlesearch
|
|
|
+import codecs
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import dataset
|
|
|
+import os
|
|
|
+
|
|
|
+
|
|
|
+def scrolling(driver,pgnum):
|
|
|
+ ub = driver.find_element_by_css_selector('body')
|
|
|
+ for i in range(pgnum):
|
|
|
+ ub.send_keys(Keys.PAGE_DOWN)
|
|
|
+ if pgnum>1:
|
|
|
+ time.sleep(0.3)
|
|
|
+
|
|
|
+
|
|
|
+def process_one(driver):
|
|
|
+ lst=[]
|
|
|
+# elmts=driver.find_elements_by_xpath("//span[contains(@class,'entity-result__title-text') ]")
|
|
|
+ elmts=driver.find_elements_by_xpath("//div[contains(@class,'entity-result__content') ]")
|
|
|
+
|
|
|
+
|
|
|
+ for elmt in elmts:
|
|
|
+ e_link=elmt.find_element_by_xpath(".//a[@class='app-aware-link']")
|
|
|
+ href=e_link.get_attribute('href')
|
|
|
+ print(href)
|
|
|
+ partial=href.split('?')
|
|
|
+ partial2=partial[0].split('/')
|
|
|
+ href=partial2[-1]
|
|
|
+# print(e_link.get_attribute('href'))
|
|
|
+ name=e_link.text.split('\n')[0]
|
|
|
+# print(name)
|
|
|
+ e_title=elmt.find_element_by_xpath(".//div[contains(@class,'entity-result__primary-subtitle') ]")
|
|
|
+# print(e_title.text)
|
|
|
+ lst.append({'name':name,'title':e_title.text,'href':href})
|
|
|
+ return lst
|
|
|
+
|
|
|
+def process_query(driver,url):
|
|
|
+ global db
|
|
|
+ driver.get(url)
|
|
|
+ table=db['linkedin_list']
|
|
|
+ while True:
|
|
|
+ time.sleep(1)
|
|
|
+ scrolling(driver,10)
|
|
|
+
|
|
|
+ lst=process_one(driver)
|
|
|
+ print(lst)
|
|
|
+ for l in lst:
|
|
|
+ table.insert(l)
|
|
|
+
|
|
|
+ try:
|
|
|
+ button=driver.find_element_by_xpath("//button[contains(@aria-label,'Next') and contains(@class,'artdeco-pagination__button--next')]")
|
|
|
+# webdriver.ActionChains(driver).move_to_element(button).perform()
|
|
|
+# webdriver.ActionChains(driver).move_to_element(button).click().perform()
|
|
|
+ print(button.text)
|
|
|
+ button.send_keys(Keys.ENTER)
|
|
|
+
|
|
|
+ print('next click')
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+ print('pnnext exception')
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def restart_browser():
|
|
|
+# os.system('docker container restart p4444')
|
|
|
+# time.sleep(10)
|
|
|
+
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+# options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
|
|
|
+ options.add_argument("start-maximized")
|
|
|
+ options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
|
|
|
+ options.add_argument('--profile-directory=Default')
|
|
|
+
|
|
|
+ driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
|
|
|
+ #driver = webdriver.Remote(
|
|
|
+ # command_executor='http://127.0.0.1:4444/wd/hub',
|
|
|
+ #desired_capabilities=options.to_capabilities())
|
|
|
+# desired_capabilities=DesiredCapabilities.CHROME)
|
|
|
+ driver.set_window_size(1400,1000)
|
|
|
+ return driver
|
|
|
+
|
|
|
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+
|
|
|
+
|
|
|
+driver=restart_browser()
|
|
|
+url='https://www.linkedin.com/search/results/people/?keywords=ceo&network=%5B%22F%22%5D&origin=FACETED_SEARCH&position=1&searchId=74911542-66f8-406d-9fc9-e0d1a9cd5045&sid=YUu'
|
|
|
+
|
|
|
+process_query(driver,url)
|
|
|
+
|
|
|
+time.sleep(9999)
|