from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import datetime import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random from bs4 import BeautifulSoup import requests import time import rpyc import sys import docker import googlesearch import codecs import sys import time import dataset import os db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4') #cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where ranking is not null;') #cursor=db.query('SELECT kw FROM hhh.hhh_contentgap_serp where kw not in (select distinct kw from hhh_contentgap_serp where id >= 155)') kwlst={} #for c in cursor: # kwlst[c['kw']]=1 table=db['hhh_top_serp'] curdir=os.path.realpath('.') #fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8') #fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8') #fr=codecs.open('C:\\gitlab\\kw_tools\\top.csv','r','utf-8') #lines=fr.readlines() lst=[] cursor=db.query('select term from hhh.content_top_terms where term not in (SELECT kw FROM hhh.hhh_top_serp where datediff(now(),dt) =0 and ranking is not null )') for c in cursor: lst.append(c['term']) #for l in lines: #for l in lines[35:]: #for l in lines[49:]: #for l in lines[34:]: #for l in lines[41:]: # # lst.append(l.replace('\n','')) headers = { "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": "處理關鍵字: "+kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(qs,number_results=10,language_code='en',pat='hhh.com.tw'): global driver escaped_search_term=urllib.parse.quote(qs) # escaped_search_term = qs.replace(' ', '+') # googleurl='https://www.google.com/search?q='+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code) driver.get(googleurl) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") idx=0 for elmt in elmts: try: href=elmt.get_attribute('href') print(str(idx)+': '+href) if pat in href: return idx idx+=1 except: print('href exception') try: elmt=driver.find_element_by_xpath("//a[@id='pnnext']") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() except: print('pnnext exception') return None time.sleep(4) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") for elmt in elmts: try: href=elmt.get_attribute('href') print(str(idx)+': '+href) if pat in href: return idx idx+=1 except: print('href2 exception') result=[] driver=None def restart_browser(): client = docker.from_env() ls=client.containers.list() print(ls) ls[0].restart() time.sleep(10) # options = webdriver.ChromeOptions() #driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) driver = webdriver.Remote( command_executor='http://127.0.0.1:4444/wd/hub', # command_executor='http://172.104.93.163:4444/wd/hub', #command_executor='http://dev2.choozmo.com:14444/wd/hub', # desired_capabilities=options.to_capabilities()) desired_capabilities=DesiredCapabilities.CHROME) driver.set_window_size(1400,1000) return driver for l in lst: #for l in lst[2:]: if True: # if kwlst.get(l) is None: driver=restart_browser() # l='房間 油漆' # idx=process_query(,number_results=100,language_code='zh-TW',pat='hhh.com.tw') idx=process_query(l,number_results=100,language_code='zh-TW',pat='hhh.com.tw') if idx==None: print(driver.page_source) if '我們的系統偵測到您的電腦網路送出的流量有異常情況' in driver.page_source: print('baned.....') sys.exit() table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()}) print({'kw':l,'ranking':idx}) db.commit() # time.sleep(9999) # time.sleep(4)