|
@@ -14,21 +14,20 @@ import logging
|
|
|
import sys
|
|
|
from logging.handlers import SysLogHandler
|
|
|
import socket
|
|
|
+import pandas as pd
|
|
|
|
|
|
_LOG_SERVER = ('hhh.ptt.cx', 514)
|
|
|
logger = logging.getLogger('clickbot_100')
|
|
|
handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
|
|
|
logger.addHandler(handler1)
|
|
|
-term='programming running..'
|
|
|
-logger.debug('[clickbot_100]['+term+']')
|
|
|
-
|
|
|
+logger.debug('[clickbot_100][清原]begin')
|
|
|
|
|
|
|
|
|
def restart_browser():
|
|
|
options = webdriver.ChromeOptions()
|
|
|
options.add_argument('--headless')
|
|
|
driver=webdriver.Chrome(options=options)
|
|
|
- driver.set_window_size(1000,3000)
|
|
|
+ driver.set_window_size(950,6000)
|
|
|
return driver
|
|
|
|
|
|
|
|
@@ -36,7 +35,7 @@ def process_one():
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
lst=[]
|
|
|
table=db['save_result']
|
|
|
- cursor=db.query('select term from selected_kw where client="清原"')
|
|
|
+ cursor=db.query('select term from selected_kw where client="清原" and term not in (SELECT distinct(keyword) FROM seo.save_result where url like "%taroboba-yuan.com%" and datediff(now(),dt)=0)')
|
|
|
for c in cursor:
|
|
|
lst.append(c['term'])
|
|
|
|
|
@@ -52,10 +51,14 @@ def process_one():
|
|
|
time.sleep(6)
|
|
|
fname=term.replace(' ','_')
|
|
|
driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
|
|
|
+ df=pd.DataFrame()
|
|
|
+
|
|
|
elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
|
|
|
clickelmt=None
|
|
|
cnt=1
|
|
|
+ datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
|
+
|
|
|
for elmt in elmts:
|
|
|
try:
|
|
|
href=elmt.get_attribute('href')
|
|
@@ -65,6 +68,11 @@ def process_one():
|
|
|
|
|
|
print(href)
|
|
|
print(elmt.text)
|
|
|
+ datadict['搜尋詞'].append(term)
|
|
|
+ datadict['結果標題'].append(elmt.text)
|
|
|
+ datadict['結果網址'].append(href)
|
|
|
+ datadict['結果名次'].append(str(cnt))
|
|
|
+
|
|
|
table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
|
|
|
cnt+=1
|
|
|
except:
|
|
@@ -73,7 +81,16 @@ def process_one():
|
|
|
if clickelmt:
|
|
|
webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
|
|
|
webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
|
|
|
+ if len(datadict['結果標題'])<=0:
|
|
|
+ print('None')
|
|
|
+ driver.quit()
|
|
|
+ sys.exit()
|
|
|
+ df['搜尋詞']=datadict['搜尋詞']
|
|
|
+ df['結果標題']=datadict['結果標題']
|
|
|
+ df['結果網址']=datadict['結果網址']
|
|
|
+ df['結果名次']=datadict['結果名次']
|
|
|
|
|
|
+ df.to_excel('c:/tmp/seo/'+fname+".xls")
|
|
|
|
|
|
driver.quit()
|
|
|
|
|
@@ -85,7 +102,8 @@ args = parser.parse_args()
|
|
|
|
|
|
if args.loop:
|
|
|
|
|
|
- schedule.every(6).minutes.do(process_one)
|
|
|
+# schedule.every(6).minutes.do(process_one)
|
|
|
+ schedule.every(0.4).minutes.do(process_one)
|
|
|
|
|
|
while True:
|
|
|
schedule.run_pending()
|