choozmo
/
kw_tools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
							from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import os
import datetime
import urllib.parse
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import codecs
import random
from bs4 import BeautifulSoup
import requests
import time
import rpyc
import sys
import docker
import  googlesearch
import codecs
import sys
import time
import dataset
import os


db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')

#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where ranking is not null;')
#cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where kw not in (select distinct kw from hhh_contentgap_serp where id >= 155)')

kwlst={}
#for c in cursor:
#    kwlst[c['kw']]=1


table=db['hhh_contentgap_serp']
curdir=os.path.realpath('.')

#fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
#fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8')
#fr=codecs.open('C:\\gitlab\\kw_tools\\kw_tools\\hhh\\SEO\\contentgap.txt','r','utf-8')
#lines=fr.readlines()
lst=[]
#for l in lines:
#    lst.append(l.replace('\n',''))
#
cursor=db.query('select term from hhh.contentgap_terms where term not in (SELECT kw FROM hhh.hhh_contentgap_serp where datediff(now(),dt) =0 and ranking is not null )')
for c in cursor:
    lst.append(c['term'])


headers = {
        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
        "Content-Type": "application/x-www-form-urlencoded"
}

def send_msg(kw):
    params = {"message": "處理關鍵字: "+kw}  
    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)


def empty_query(q):
    global driver
    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
    driver.get(googleurl)
    time.sleep(3)


def process_query(qs,number_results=10,language_code='en',pat='hhh.com.tw'):
    global driver
    escaped_search_term=urllib.parse.quote(qs)
#    escaped_search_term = qs.replace(' ', '+')
#    googleurl='https://www.google.com/search?q='+
    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code)

    driver.get(googleurl)
    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
    idx=0
    for elmt in elmts:
        try:
            href=elmt.get_attribute('href')
            print(str(idx)+': '+href)
            if pat in href:
                return idx
            idx+=1
        except:
            print('href exception')

    try:
        elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
        webdriver.ActionChains(driver).move_to_element(elmt).perform()
        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
    except:
        print('pnnext exception')
        return None

    time.sleep(4)
    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")

    for elmt in elmts:
        try:
            href=elmt.get_attribute('href')
            print(str(idx)+': '+href)
            if pat in href:
                return idx
            idx+=1

        except:
            print('href2 exception')


result=[]
driver=None

def restart_browser():
    os.system('docker container restart p4444')

#    client = docker.from_env()
#    ls=client.containers.list()
#    print(ls)
#    ls[0].restart()
    time.sleep(10)

    options = webdriver.ChromeOptions()
#    options.add_argument("--proxy-server=socks5://130.61.93.198:1080")

    #driver=webdriver.Chrome(desired_capabilities=options.to_capabilities())
    driver = webdriver.Remote(
        command_executor='http://127.0.0.1:4444/wd/hub',
#        command_executor='http://172.104.93.163:4444/wd/hub', 
    #command_executor='http://dev2.choozmo.com:14444/wd/hub',
    desired_capabilities=options.to_capabilities())
#    desired_capabilities=DesiredCapabilities.CHROME)
    driver.set_window_size(1400,1000)
    return driver


for l in lst:
#for l in lst[21:]:

#for l in lst[32:]:
#for l in lst[42:]:

    if True:
#    if kwlst.get(l) is None:
        driver=restart_browser()

    #    l='房間 油漆'
    #    idx=process_query(,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
        idx=process_query(l,number_results=100,language_code='zh-TW',pat='hhh.com.tw')
#        if idx is None:
#            sys.exit()
        print({'kw':l,'ranking':idx})
        if idx==None:
            print(driver.page_source)
            if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source:
                print('baned.....')
                sys.exit()
        table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})

        db.commit()
    #    time.sleep(9999)

#        time.sleep(4)