| 
					
				 | 
			
			
				@@ -0,0 +1,111 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import urllib.parse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.support.ui import WebDriverWait 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.support import expected_conditions as EC 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.keys import Keys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import codecs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from bs4 import BeautifulSoup 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import requests 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import rpyc 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import docker 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import  googlesearch 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import codecs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def scrolling(driver,pgnum): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ub = driver.find_element_by_css_selector('body') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for i in range(pgnum): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ub.send_keys(Keys.PAGE_DOWN) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if pgnum>1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(0.3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def process_one(driver): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    lst=[] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    elmts=driver.find_elements_by_xpath("//span[contains(@class,'entity-result__title-text') ]") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elmts=driver.find_elements_by_xpath("//div[contains(@class,'entity-result__content') ]") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for elmt in elmts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        e_link=elmt.find_element_by_xpath(".//a[@class='app-aware-link']") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        href=e_link.get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        partial=href.split('?') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        partial2=partial[0].split('/') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        href=partial2[-1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#        print(e_link.get_attribute('href')) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        name=e_link.text.split('\n')[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#        print(name) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        e_title=elmt.find_element_by_xpath(".//div[contains(@class,'entity-result__primary-subtitle') ]") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#        print(e_title.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        lst.append({'name':name,'title':e_title.text,'href':href}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return lst 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def process_query(driver,url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global db 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.get(url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    table=db['linkedin_list'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        scrolling(driver,10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        lst=process_one(driver) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(lst) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for l in lst: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            table.insert(l) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            button=driver.find_element_by_xpath("//button[contains(@aria-label,'Next') and contains(@class,'artdeco-pagination__button--next')]") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#            webdriver.ActionChains(driver).move_to_element(button).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#            webdriver.ActionChains(driver).move_to_element(button).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(button.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            button.send_keys(Keys.ENTER) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('next click') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('pnnext exception') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def restart_browser(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    os.system('docker container restart p4444') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    time.sleep(10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options = webdriver.ChromeOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument("start-maximized") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--profile-directory=Default') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #driver = webdriver.Remote( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #    command_executor='http://127.0.0.1:4444/wd/hub', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #desired_capabilities=options.to_capabilities()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    desired_capabilities=DesiredCapabilities.CHROME) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.set_window_size(1400,1000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+driver=restart_browser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+url='https://www.linkedin.com/search/results/people/?keywords=ceo&network=%5B%22F%22%5D&origin=FACETED_SEARCH&position=1&searchId=74911542-66f8-406d-9fc9-e0d1a9cd5045&sid=YUu' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+process_query(driver,url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+time.sleep(9999) 
			 |