| 
					
				 | 
			
			
				@@ -0,0 +1,157 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import json 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import urllib.parse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.support.ui import WebDriverWait 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.chrome.service import Service 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.keys import Keys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.support import expected_conditions as EC 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import codecs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import requests 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import fire 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import redis 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+driver = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def rua(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    pool = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return random.choice(pool) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def empty_query(q): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.get(googleurl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def process_query(qs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    table=db['general_log'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    q=qs[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    domain=qs[1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    googleurl = 'https://www.google.com/?num=100' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.get(googleurl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(6) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    send_kw_elmt.send_keys(q) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    send_kw_elmt.send_keys(Keys.ENTER) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(6) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(driver.current_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    idx=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ranking=-1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print('網頁數量',len(elmts)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    driver.save_screenshot('c:/tmp/test.png') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if 'site' in q: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        href = elmts[0].get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        txt = elmts[0].text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('clicked....') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(txt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print("ranking", idx) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        table.insert( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        webdriver.ActionChains(driver).move_to_element(elmts[0]).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for elmt in elmts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        href=elmt.get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        txt=elmt.text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(txt)>10: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if domain in href: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('clicked....') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('點擊網址',href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('標題',txt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print("ranking", idx) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                webdriver.ActionChains(driver).move_to_element(elmt).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        idx+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def run_once(q): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    result=[] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    s = Service('/root/driver/chromedriver') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    user_agent = rua() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options = webdriver.ChromeOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--headless') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--remote-debugging-port=9222') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_experimental_option("debuggerAddress", "127.0.0.1:9922") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # options.add_argument("--user-agent=" +user_agent) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument("--incognito") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    data=r.get('google_proxy') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    jstext=data.decode('utf-8') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    jsobj=json.loads(jstext) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    proxy=random.choice(jsobj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print('Freeproxy',proxy) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    change_ip = ["'--proxy-server='+proxy","--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--proxy-server=socks5://192.53.174.202:8180') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Chrome( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options=options,service=s) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if 'sorry' in driver.current_url: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data=r.get('google_proxy') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        jstext=data.decode('utf-8') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        jsobj=json.loads(jstext) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('Free proxy',jsobj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        proxy=random.choice(jsobj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        change_ip_list = ['--proxy-server=%s' % proxy,"--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        change_ip = random.choice(change_ip_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        options.add_argument(change_ip) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('使用代理ip',change_ip) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+         
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Chrome(options=options,service=s) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.delete_all_cookies() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.set_window_size(1400,1000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    process_query(q) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#for c in lst: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#        c=random.choice(lst) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#        traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    sleepint=random.randint(320,520) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#    time.sleep(sleepint) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+class JParams(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  def get(self, kw,domain,port): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    run_once( (kw,domain,port) ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  fire.Fire(JParams) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 |