Jared %!s(int64=3) %!d(string=hai) anos
pai
achega
06d6edfdcf
Modificáronse 1 ficheiros con 24 adicións e 6 borrados
  1. 24 6
      SEO/clickbot_100.py

+ 24 - 6
SEO/clickbot_100.py

@@ -14,21 +14,20 @@ import logging
 import sys
 from logging.handlers import SysLogHandler
 import socket
+import pandas as pd
 
 _LOG_SERVER = ('hhh.ptt.cx', 514)
 logger = logging.getLogger('clickbot_100')
 handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
 logger.addHandler(handler1)
-term='programming running..'
-logger.debug('[clickbot_100]['+term+']')
-
+logger.debug('[clickbot_100][清原]begin')
 
 
 def restart_browser():
     options = webdriver.ChromeOptions()
     options.add_argument('--headless') 
     driver=webdriver.Chrome(options=options)
-    driver.set_window_size(1000,3000)
+    driver.set_window_size(950,6000)
     return driver
 
 
@@ -36,7 +35,7 @@ def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst=[]
     table=db['save_result']
-    cursor=db.query('select term from selected_kw where client="清原"')
+    cursor=db.query('select term from selected_kw where client="清原" and term not in (SELECT distinct(keyword) FROM seo.save_result where url like "%taroboba-yuan.com%" and datediff(now(),dt)=0)')
     for c in cursor:
         lst.append(c['term'])
 
@@ -52,10 +51,14 @@ def process_one():
     time.sleep(6)
     fname=term.replace(' ','_')
     driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    df=pd.DataFrame()
+
     elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
 
     clickelmt=None
     cnt=1
+    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
+    
     for elmt in elmts:
         try:
             href=elmt.get_attribute('href')
@@ -65,6 +68,11 @@ def process_one():
 
             print(href)
             print(elmt.text)
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+
             table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
             cnt+=1
         except:
@@ -73,7 +81,16 @@ def process_one():
     if clickelmt:
         webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
         webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+    if len(datadict['結果標題'])<=0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    df['搜尋詞']=datadict['搜尋詞']
+    df['結果標題']=datadict['結果標題']
+    df['結果網址']=datadict['結果網址']
+    df['結果名次']=datadict['結果名次']
 
+    df.to_excel('c:/tmp/seo/'+fname+".xls")
 
     driver.quit()
 
@@ -85,7 +102,8 @@ args = parser.parse_args()
 
 if args.loop:
 
-    schedule.every(6).minutes.do(process_one)
+#    schedule.every(6).minutes.do(process_one)
+    schedule.every(0.4).minutes.do(process_one)
 
     while True:
         schedule.run_pending()