|
@@ -0,0 +1,65 @@
|
|
|
+import random
|
|
|
+import sys
|
|
|
+import dataset
|
|
|
+from selenium import webdriver
|
|
|
+import traceback
|
|
|
+import datetime
|
|
|
+import codecs
|
|
|
+import time
|
|
|
+import urllib
|
|
|
+import argparse
|
|
|
+import schedule
|
|
|
+
|
|
|
+def restart_browser():
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ driver=webdriver.Chrome(options=options)
|
|
|
+ driver.set_window_size(1400,1000)
|
|
|
+ return driver
|
|
|
+
|
|
|
+
|
|
|
+def process_one():
|
|
|
+ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+ lst=[]
|
|
|
+ cursor=db.query('select term from selected_kw where client="清原"')
|
|
|
+ for c in cursor:
|
|
|
+ lst.append(c['term'])
|
|
|
+
|
|
|
+ term=random.choice(lst)
|
|
|
+ print(term)
|
|
|
+ driver=restart_browser()
|
|
|
+ escaped_search_term=urllib.parse.quote(term)
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
|
|
|
+
|
|
|
+ print(googleurl)
|
|
|
+ driver.get(googleurl)
|
|
|
+ time.sleep(6)
|
|
|
+
|
|
|
+ elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
+
|
|
|
+ for elmt in elmts:
|
|
|
+ try:
|
|
|
+ href=elmt.get_attribute('href')
|
|
|
+ if 'taroboba-yuan.com' in href:
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
+ print(href)
|
|
|
+ print(elmt.text)
|
|
|
+ break
|
|
|
+ except:
|
|
|
+ print('href2 exception')
|
|
|
+ traceback.print_exc()
|
|
|
+ driver.quit()
|
|
|
+
|
|
|
+process_one()
|
|
|
+
|
|
|
+parser = argparse.ArgumentParser()
|
|
|
+parser.add_argument('--loop')
|
|
|
+args = parser.parse_args()
|
|
|
+
|
|
|
+if args.loop:
|
|
|
+
|
|
|
+ schedule.every(5).minutes.do(process_one)
|
|
|
+
|
|
|
+ while True:
|
|
|
+ schedule.run_pending()
|
|
|
+ time.sleep(1)
|