choozmo
/
kw_tools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
							import random
import sys
import dataset
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import traceback
import datetime
import codecs
import time
import urllib
import argparse
import logging
import sys
from logging.handlers import SysLogHandler
import socket
import pandas as pd
import socket
import os
import dataset
import pymysql
pymysql.install_as_MySQLdb()

driver = None
def restart_browser():
    global driver
    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
    s = Service('/Users/mac/Downloads/123/chromedriver')
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    # options.add_argument('--remote-debugging-port=9222')
    # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
    options.add_argument("--user-agent=" + user_agent)
    options.add_argument("--incognito")
    # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
    driver = webdriver.Chrome(
        options=options, service=s)
    str1 = driver.capabilities['browserVersion']
    print('版本', str1)
    driver.delete_all_cookies()
    driver.set_window_size(1400, 20000)
    return driver

def process_one():
    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
    lst = []
    table=db['google_rank']
    # cursor = db.query('select term,domain from seo.selected_kw where client="新飛0721"')
    # cursor = db.query('select term,url from seo.sns_kw where client="理茶"')
    cursor = db.query('select term,domain from seo.selected_kw where client="歌林"')
    # cursor = db.query('select term,domain from seo.selected_kw where id between 3367 and 3375')
    # cursor = db.query('select term,domain from seo.select_kw where client="幸福空間"')
    # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
    for c in cursor:
        lst.append([c['term'],c['domain']])
        # lst.append([c['term'],c['url']])
        # lst.append(c['term'])
    # domain = 'fleurancenature.tw'
    for i in lst:
        print(i)
        driver=restart_browser()
        escaped_search_term=urllib.parse.quote(i[0])
        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(escaped_search_term, 100,'zh-TW')
        driver.get(googleurl)
        print(driver.current_url)
        # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/新飛\/0721\/'+i[0]+'.png')
        df=pd.DataFrame()
        elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']//a")
        print('結果數量',len(elmts))
        cnt=1
        datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}

        for elmt in elmts:
            try:
                href=elmt.get_attribute('href')
                if i[1] in href:
                    table.insert({'title':elmt.text,'url':href,'keyword':i[0],'dt':datetime.datetime.now(),'ranking':cnt})
                    print(href)
                    print(elmt.text)
                # if 'hhh.com.tw' in href:
                #     table.insert({'title': elmt.text, 'url': href, 'keyword': i, 'dt': datetime.datetime.now(),
                #                   'ranking': cnt})
                #     print(href)
                #     print(elmt.text)
                datadict['搜尋詞'].append(i[0])
                datadict['結果標題'].append(elmt.text)
                datadict['結果網址'].append(href)
                datadict['結果名次'].append(str(cnt))

                cnt+=1
            except:
                print('href2 exception')
                traceback.print_exc()


        df['搜尋詞']=datadict['搜尋詞']
        df['結果標題']=datadict['結果標題']
        df['結果網址']=datadict['結果網址']
        df['結果名次']=datadict['結果名次']

        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/芙樂思\/0720\/'+i+'.png')

        driver.quit()
        print('等待')
        time.sleep(random.randint(70,90))
    db.close()
process_one()