|
@@ -2,14 +2,14 @@ import random
|
|
|
import sys
|
|
|
import dataset
|
|
|
from selenium import webdriver
|
|
|
+from selenium.webdriver.chrome.service import Service
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
import traceback
|
|
|
import datetime
|
|
|
import codecs
|
|
|
import time
|
|
|
import urllib
|
|
|
import argparse
|
|
|
-import schedule
|
|
|
-
|
|
|
import logging
|
|
|
import sys
|
|
|
from logging.handlers import SysLogHandler
|
|
@@ -17,100 +17,87 @@ import socket
|
|
|
import pandas as pd
|
|
|
import socket
|
|
|
import os
|
|
|
-_LOG_SERVER = ('hhh.ptt.cx', 514)
|
|
|
-logger = logging.getLogger('clickbot_100')
|
|
|
-handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
|
|
|
-logger.addHandler(handler1)
|
|
|
-#logger.debug('[clickbot_100][清原]begin')
|
|
|
-
|
|
|
-hname=socket.gethostname()
|
|
|
-pid=str(os.getpid())
|
|
|
-logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
|
|
|
-
|
|
|
+import dataset
|
|
|
+import pymysql
|
|
|
+pymysql.install_as_MySQLdb()
|
|
|
|
|
|
+driver = None
|
|
|
def restart_browser():
|
|
|
+ global driver
|
|
|
+ user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
|
|
|
+ s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver')
|
|
|
options = webdriver.ChromeOptions()
|
|
|
- options.add_argument('--headless')
|
|
|
- driver=webdriver.Chrome(options=options)
|
|
|
- driver.set_window_size(950,6000)
|
|
|
+ options.add_argument('--headless')
|
|
|
+ # options.add_argument('--remote-debugging-port=9222')
|
|
|
+ # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
|
|
|
+ options.add_argument("--user-agent=" + user_agent)
|
|
|
+ options.add_argument("--incognito")
|
|
|
+ # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
|
|
|
+ driver = webdriver.Chrome(
|
|
|
+ options=options, service=s)
|
|
|
+ str1 = driver.capabilities['browserVersion']
|
|
|
+ print('版本', str1)
|
|
|
+ driver.delete_all_cookies()
|
|
|
+ driver.set_window_size(1400, 20000)
|
|
|
return driver
|
|
|
|
|
|
-
|
|
|
def process_one():
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
- lst=[]
|
|
|
+ lst=['123']
|
|
|
table=db['google_rank']
|
|
|
- cursor = db.query('select term from seo.selected_kw')
|
|
|
+ # cursor = db.query('select term from seo.selected_kw where client="鑫富"')
|
|
|
+ # cursor = db.query('select term from seo.selected_kw where id between 1902 and 1923')
|
|
|
# cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
|
|
|
- for c in cursor:
|
|
|
- lst.append(c['term'])
|
|
|
-
|
|
|
- term=random.choice(lst)
|
|
|
- print(term)
|
|
|
- logger.debug('[clickbot_100]['+term+']')
|
|
|
- driver=restart_browser()
|
|
|
- escaped_search_term=urllib.parse.quote(term)
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
|
|
|
-
|
|
|
- print(googleurl)
|
|
|
- driver.get(googleurl)
|
|
|
- time.sleep(6)
|
|
|
- fname=term.replace(' ','_')
|
|
|
- # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
|
|
|
- df=pd.DataFrame()
|
|
|
- # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
|
|
|
- elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
+ # for c in cursor:
|
|
|
+ # lst.append(c['term'])
|
|
|
+ db.close()
|
|
|
+ domain = 'vickybrain.com'
|
|
|
+ for term in lst:
|
|
|
+ print(term)
|
|
|
+ driver=restart_browser()
|
|
|
+ escaped_search_term=urllib.parse.quote(term)
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
|
|
|
+ driver.get(googleurl)
|
|
|
+ time.sleep(60)
|
|
|
+ print(driver.current_url)
|
|
|
+ driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+'.png')
|
|
|
+ df=pd.DataFrame()
|
|
|
+ elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
|
|
|
+ print('結果數量',len(elmts))
|
|
|
+ cnt=1
|
|
|
+ datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
|
+
|
|
|
+ for elmt in elmts:
|
|
|
+ try:
|
|
|
+ href=elmt.get_attribute('href')
|
|
|
+ if domain in href:
|
|
|
+ # table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'ranking':cnt})
|
|
|
+ print(href)
|
|
|
+ print(elmt.text)
|
|
|
+ datadict['搜尋詞'].append(term)
|
|
|
+ datadict['結果標題'].append(elmt.text)
|
|
|
+ datadict['結果網址'].append(href)
|
|
|
+ datadict['結果名次'].append(str(cnt))
|
|
|
+
|
|
|
+ cnt+=1
|
|
|
+ except:
|
|
|
+ print('href2 exception')
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+ if len(datadict['結果標題'])<=0:
|
|
|
+ print('None')
|
|
|
+ driver.quit()
|
|
|
+ sys.exit()
|
|
|
+ df['搜尋詞']=datadict['搜尋詞']
|
|
|
+ df['結果標題']=datadict['結果標題']
|
|
|
+ df['結果網址']=datadict['結果網址']
|
|
|
+ df['結果名次']=datadict['結果名次']
|
|
|
+
|
|
|
+ df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/鑫富\/'+term+".xls")
|
|
|
|
|
|
- clickelmt=None
|
|
|
- cnt=1
|
|
|
- datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
|
-
|
|
|
- for elmt in elmts:
|
|
|
- try:
|
|
|
- href=elmt.get_attribute('href')
|
|
|
- if 'taroboba-yuan.com' in href:
|
|
|
- clickelmt=elmt
|
|
|
- logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
|
|
|
-
|
|
|
- print(href)
|
|
|
- print(elmt.text)
|
|
|
- datadict['搜尋詞'].append(term)
|
|
|
- datadict['結果標題'].append(elmt.text)
|
|
|
- datadict['結果網址'].append(href)
|
|
|
- datadict['結果名次'].append(str(cnt))
|
|
|
-
|
|
|
- table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
|
|
|
- cnt+=1
|
|
|
- except:
|
|
|
- print('href2 exception')
|
|
|
- traceback.print_exc()
|
|
|
- if clickelmt:
|
|
|
- webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
|
|
|
- webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
|
|
|
- if len(datadict['結果標題'])<=0:
|
|
|
- print('None')
|
|
|
driver.quit()
|
|
|
- sys.exit()
|
|
|
- df['搜尋詞']=datadict['搜尋詞']
|
|
|
- df['結果標題']=datadict['結果標題']
|
|
|
- df['結果網址']=datadict['結果網址']
|
|
|
- df['結果名次']=datadict['結果名次']
|
|
|
-
|
|
|
- df.to_excel('/Users/zooeytsai/'+fname+".xls")
|
|
|
-
|
|
|
- driver.quit()
|
|
|
-
|
|
|
+ print('等待')
|
|
|
+ time.sleep(random.randint(100,120))
|
|
|
process_one()
|
|
|
|
|
|
-parser = argparse.ArgumentParser()
|
|
|
-parser.add_argument('--loop')
|
|
|
-args = parser.parse_args()
|
|
|
-
|
|
|
-if args.loop:
|
|
|
-
|
|
|
-# schedule.every(6).minutes.do(process_one)
|
|
|
- schedule.every(0.4).minutes.do(process_one)
|
|
|
|
|
|
- while True:
|
|
|
- schedule.run_pending()
|
|
|
- time.sleep(1)
|