| 
					
				 | 
			
			
				@@ -2,14 +2,14 @@ import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.chrome.service import Service 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import codecs 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import urllib 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import schedule 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import logging 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from logging.handlers import SysLogHandler 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -17,100 +17,87 @@ import socket 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import socket 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-_LOG_SERVER = ('hhh.ptt.cx', 514) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-logger = logging.getLogger('clickbot_100') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-logger.addHandler(handler1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-#logger.debug('[clickbot_100][清原]begin') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-hname=socket.gethostname() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-pid=str(os.getpid()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-logger.fatal('[clickbot_100]['+hname+']['+pid+']begin') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import pymysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+pymysql.install_as_MySQLdb() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+driver = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def restart_browser(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     options = webdriver.ChromeOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    options.add_argument('--headless')  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver=webdriver.Chrome(options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver.set_window_size(950,6000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--headless') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # options.add_argument('--remote-debugging-port=9222') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument("--user-agent=" + user_agent) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument("--incognito") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # options.add_argument('--proxy-server=socks5://172.104.93.163:41800') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Chrome( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        options=options, service=s) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    str1 = driver.capabilities['browserVersion'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print('版本', str1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.delete_all_cookies() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.set_window_size(1400, 20000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_one(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    lst=[] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    lst=['123'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     table=db['google_rank'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    cursor = db.query('select term from seo.selected_kw') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # cursor = db.query('select term from seo.selected_kw where client="鑫富"') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # cursor = db.query('select term from seo.selected_kw where id between 1902 and 1923') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for c in cursor: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        lst.append(c['term']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    term=random.choice(lst) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    logger.debug('[clickbot_100]['+term+']') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver=restart_browser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    escaped_search_term=urllib.parse.quote(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(googleurl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver.get(googleurl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    time.sleep(6) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    fname=term.replace(' ','_') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df=pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # for c in cursor: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #     lst.append(c['term']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    domain = 'vickybrain.com' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for term in lst: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver=restart_browser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        escaped_search_term=urllib.parse.quote(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver.get(googleurl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(60) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(driver.current_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+'.png') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df=pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('結果數量',len(elmts)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cnt=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for elmt in elmts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                href=elmt.get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if domain in href: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'ranking':cnt}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(elmt.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['搜尋詞'].append(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果標題'].append(elmt.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果網址'].append(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果名次'].append(str(cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                cnt+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('href2 exception') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(datadict['結果標題'])<=0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('None') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sys.exit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df['搜尋詞']=datadict['搜尋詞'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df['結果標題']=datadict['結果標題'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df['結果網址']=datadict['結果網址'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df['結果名次']=datadict['結果名次'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+".xls") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    clickelmt=None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    cnt=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for elmt in elmts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            href=elmt.get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if 'taroboba-yuan.com' in href: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                clickelmt=elmt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(elmt.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            datadict['搜尋詞'].append(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            datadict['結果標題'].append(elmt.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            datadict['結果網址'].append(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            datadict['結果名次'].append(str(cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            cnt+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print('href2 exception') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if clickelmt: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        webdriver.ActionChains(driver).move_to_element(clickelmt).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if len(datadict['結果標題'])<=0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print('None') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sys.exit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['搜尋詞']=datadict['搜尋詞'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['結果標題']=datadict['結果標題'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['結果網址']=datadict['結果網址'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['結果名次']=datadict['結果名次'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df.to_excel('/Users/zooeytsai/'+fname+".xls") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('等待') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(random.randint(100,120)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 process_one() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-parser = argparse.ArgumentParser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-parser.add_argument('--loop') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-if args.loop: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-#    schedule.every(6).minutes.do(process_one) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    schedule.every(0.4).minutes.do(process_one) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        schedule.run_pending() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        time.sleep(1) 
			 |