|  | @@ -0,0 +1,108 @@
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import json
 | 
	
		
			
				|  |  | +from selenium import webdriver
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import urllib.parse
 | 
	
		
			
				|  |  | +from selenium.webdriver.support.ui import WebDriverWait
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | +from selenium.webdriver.support import expected_conditions as EC
 | 
	
		
			
				|  |  | +import codecs
 | 
	
		
			
				|  |  | +import random
 | 
	
		
			
				|  |  | +import requests
 | 
	
		
			
				|  |  | +import dataset
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import traceback
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +driver=None
 | 
	
		
			
				|  |  | +headers = {
 | 
	
		
			
				|  |  | +        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
 | 
	
		
			
				|  |  | +        "Content-Type": "application/x-www-form-urlencoded"
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def send_msg(kw):
 | 
	
		
			
				|  |  | +    params = {"message": "處理關鍵字: "+kw}  
 | 
	
		
			
				|  |  | +    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def empty_query(q):
 | 
	
		
			
				|  |  | +    global driver
 | 
	
		
			
				|  |  | +    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
 | 
	
		
			
				|  |  | +    driver.get(googleurl)
 | 
	
		
			
				|  |  | +    time.sleep(3)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def process_query(qs):
 | 
	
		
			
				|  |  | +    q=qs[0]
 | 
	
		
			
				|  |  | +    domain=qs[1]
 | 
	
		
			
				|  |  | +    global driver
 | 
	
		
			
				|  |  | +    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
 | 
	
		
			
				|  |  | +    print(googleurl)
 | 
	
		
			
				|  |  | +    driver.get(googleurl)
 | 
	
		
			
				|  |  | +    time.sleep(6)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    idx=1
 | 
	
		
			
				|  |  | +    ranking=-1
 | 
	
		
			
				|  |  | +    print(len(elmts))
 | 
	
		
			
				|  |  | +#    driver.save_screenshot('c:/tmp/test.png')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for elmt in elmts:
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        href=elmt.get_attribute('href')
 | 
	
		
			
				|  |  | +        txt=elmt.text
 | 
	
		
			
				|  |  | +        if len(txt)>10:
 | 
	
		
			
				|  |  | +            if domain in href:
 | 
	
		
			
				|  |  | +                print('clicked....')
 | 
	
		
			
				|  |  | +                print(href)
 | 
	
		
			
				|  |  | +                print(txt)
 | 
	
		
			
				|  |  | +                webdriver.ActionChains(driver).move_to_element(elmt).perform()
 | 
	
		
			
				|  |  | +                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
 | 
	
		
			
				|  |  | +                break
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def run_once(q):
 | 
	
		
			
				|  |  | +    global driver
 | 
	
		
			
				|  |  | +    result=[]
 | 
	
		
			
				|  |  | +    options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +    options.add_argument('--headless')
 | 
	
		
			
				|  |  | +#    options.add_argument("--user-agent=" +user_agent)
 | 
	
		
			
				|  |  | +    options.add_argument("--incognito")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    driver = webdriver.Chrome(
 | 
	
		
			
				|  |  | +    options=options)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    driver.delete_all_cookies()
 | 
	
		
			
				|  |  | +    driver.set_window_size(1400,1000)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print(q)
 | 
	
		
			
				|  |  | +    process_query(q)
 | 
	
		
			
				|  |  | +    time.sleep(3)
 | 
	
		
			
				|  |  | +    driver.quit()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
 | 
	
		
			
				|  |  | +lst=[]
 | 
	
		
			
				|  |  | +db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +cursor=db.query('SELECT kw as term,domain FROM seo.seo_clickjobs where category="hhh-faq"  order by rand()')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +for c in cursor:
 | 
	
		
			
				|  |  | +    lst.append(c)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#for c in lst:
 | 
	
		
			
				|  |  | +while True:
 | 
	
		
			
				|  |  | +    try:
 | 
	
		
			
				|  |  | +        c=random.choice(lst)
 | 
	
		
			
				|  |  | +        run_once( (c['term'],c['domain'])   )
 | 
	
		
			
				|  |  | +    except:
 | 
	
		
			
				|  |  | +        traceback.print_exc()
 | 
	
		
			
				|  |  | +    sleepint=random.randint(380,520)
 | 
	
		
			
				|  |  | +    time.sleep(sleepint)
 | 
	
		
			
				|  |  | +
 |