Jared 3 jaren geleden
bovenliggende
commit
8a8e7910fa
1 gewijzigde bestanden met toevoegingen van 111 en 0 verwijderingen
  1. 111 0
      choozmo/linkedin_auto.py

+ 111 - 0
choozmo/linkedin_auto.py

@@ -0,0 +1,111 @@
+import traceback
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import datetime
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+import codecs
+import random
+from bs4 import BeautifulSoup
+import requests
+import time
+import rpyc
+import sys
+import docker
+import  googlesearch
+import codecs
+import sys
+import time
+import dataset
+import os
+
+
+def scrolling(driver,pgnum):
+    ub = driver.find_element_by_css_selector('body')
+    for i in range(pgnum):
+        ub.send_keys(Keys.PAGE_DOWN)
+        if pgnum>1:
+            time.sleep(0.3)
+
+
+def process_one(driver):
+    lst=[]
+#    elmts=driver.find_elements_by_xpath("//span[contains(@class,'entity-result__title-text') ]")
+    elmts=driver.find_elements_by_xpath("//div[contains(@class,'entity-result__content') ]")
+
+
+    for elmt in elmts:
+        e_link=elmt.find_element_by_xpath(".//a[@class='app-aware-link']")
+        href=e_link.get_attribute('href')
+        print(href)
+        partial=href.split('?')
+        partial2=partial[0].split('/')
+        href=partial2[-1]
+#        print(e_link.get_attribute('href'))
+        name=e_link.text.split('\n')[0]
+#        print(name)
+        e_title=elmt.find_element_by_xpath(".//div[contains(@class,'entity-result__primary-subtitle') ]")
+#        print(e_title.text)
+        lst.append({'name':name,'title':e_title.text,'href':href})
+    return lst
+
+def process_query(driver,url):
+    global db
+    driver.get(url)
+    table=db['linkedin_list']
+    while True:
+        time.sleep(1)
+        scrolling(driver,10)
+
+        lst=process_one(driver)
+        print(lst)
+        for l in lst:
+            table.insert(l)
+
+        try:
+            button=driver.find_element_by_xpath("//button[contains(@aria-label,'Next') and contains(@class,'artdeco-pagination__button--next')]")
+#            webdriver.ActionChains(driver).move_to_element(button).perform()
+#            webdriver.ActionChains(driver).move_to_element(button).click().perform()
+            print(button.text)
+            button.send_keys(Keys.ENTER)
+
+            print('next click')
+        except:
+            traceback.print_exc()
+            print('pnnext exception')
+            return None
+
+
+
+def restart_browser():
+#    os.system('docker container restart p4444')
+#    time.sleep(10)
+
+    options = webdriver.ChromeOptions()
+#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080")
+    options.add_argument("start-maximized")
+    options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data')
+    options.add_argument('--profile-directory=Default')
+
+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
+    #driver = webdriver.Remote(
+    #    command_executor='http://127.0.0.1:4444/wd/hub',
+    #desired_capabilities=options.to_capabilities())
+#    desired_capabilities=DesiredCapabilities.CHROME)
+    driver.set_window_size(1400,1000)
+    return driver
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+
+driver=restart_browser()
+url='https://www.linkedin.com/search/results/people/?keywords=ceo&network=%5B%22F%22%5D&origin=FACETED_SEARCH&position=1&searchId=74911542-66f8-406d-9fc9-e0d1a9cd5045&sid=YUu'
+
+process_query(driver,url)
+
+time.sleep(9999)