1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- import random
- import sys
- import dataset
- from selenium import webdriver
- import traceback
- import datetime
- import codecs
- import time
- import urllib
- import argparse
- import schedule
- def restart_browser():
- options = webdriver.ChromeOptions()
- driver=webdriver.Chrome(options=options)
- driver.set_window_size(1400,1000)
- return driver
- def process_one():
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- lst=[]
- cursor=db.query('select term from selected_kw where client="清原"')
- for c in cursor:
- lst.append(c['term'])
- term=random.choice(lst)
- print(term)
- driver=restart_browser()
- escaped_search_term=urllib.parse.quote(term)
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
- print(googleurl)
- driver.get(googleurl)
- time.sleep(6)
- elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
- for elmt in elmts:
- try:
- href=elmt.get_attribute('href')
- if 'taroboba-yuan.com' in href:
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- print(href)
- print(elmt.text)
- break
- except:
- print('href2 exception')
- traceback.print_exc()
- driver.quit()
- process_one()
- parser = argparse.ArgumentParser()
- parser.add_argument('--loop')
- args = parser.parse_args()
- if args.loop:
- schedule.every(5).minutes.do(process_one)
- while True:
- schedule.run_pending()
- time.sleep(1)
|