choozmo
/
kw_tools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
							import time
from datetime import datetime
import json
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import os
import urllib.parse
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import codecs
import random
import requests
import dataset
import traceback
import sys

target_domain=['bennis.com.tw']
brands={'bennis.com.tw':'班尼斯'}

driver=None
headers = {
        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
        "Content-Type": "application/x-www-form-urlencoded"
}


def send_msg(kw):
    params = {"message": "處理關鍵字: "+kw}  
    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)


def empty_query(q):
    global driver
    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
    driver.get(googleurl)
    time.sleep(3)


def process_query(qs):
    q=qs[0]
    domain=qs[1]
    global driver
    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
    print(googleurl)
    driver.get(googleurl)
    #time.sleep(6)

    # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
    # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
    elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")

    idx=1
    ranking=-1
    domain_in_link = 0

    print(len(elmts))
    # driver.save_screenshot('c:/tmp/test.png')

    for elmt in elmts:
        href=elmt.get_attribute('href')
        txt=elmt.text
        if len(txt)>10:
            if domain in href:
                domain_in_link += 1
                print('clicked....')
                print(href)
                print(txt)
                webdriver.ActionChains(driver).move_to_element(elmt).perform()
                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
                break
    
    if domain in target_domain:
        print("Target domain found")
        time_stamp = datetime.fromtimestamp(time.time())
        time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
        db['query_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": q, "googleurl": googleurl, "element_count": len(elmts), "domain_in_link_count": domain_in_link})

def run_once(q):
    global driver
    result=[]
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
#    options.add_argument("--user-agent=" +user_agent)
    options.add_argument("--incognito")
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    driver = webdriver.Chrome(
    options=options)

    driver.delete_all_cookies()
    driver.set_window_size(1400,1000)

    print(q)
    process_query(q)
    time.sleep(3)
    driver.quit()

#lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
lst=[]
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')

cursor=db.query('select term,domain from selected_kw order by rand()')

for c in cursor:
    lst.append(c)


#for c in lst:
while True:
    try:
        c=random.choice(lst)
        run_once( (c['term'],c['domain'])   )
    except:
        traceback.print_exc()
    sleepint=random.randint(20,40)
    time.sleep(sleepint)