فهرست منبع

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Jared 2 سال پیش
والد
کامیت
083befea50
7فایلهای تغییر یافته به همراه640 افزوده شده و 0 حذف شده
  1. 141 0
      SEO/general_gsrack.py
  2. 38 0
      SEO/month_kw_rank.py
  3. 148 0
      SEO/ranking_day.py
  4. 129 0
      SEO/ranking_day2.py
  5. 64 0
      SEO/run_gsrack.py
  6. 59 0
      SEO/run_ranking_day.py
  7. 61 0
      monitor/monitor_chrome.py

+ 141 - 0
SEO/general_gsrack.py

@@ -0,0 +1,141 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+import random
+import pymysql
+
+pymysql.install_as_MySQLdb()
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table = db['general_log']
+
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def empty_query(q):
+    global driver
+    googleurl = 'https://www.google.com/search?q=' + urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q = qs[0]
+    domain = qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    
+    while True:
+        try:
+            elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+            print('尋找')
+            break
+        except:
+            pass
+    
+    idx = 1
+    ranking = -1
+    print('搜尋結果數量', len(elmts))
+    #    driver.save_screenshot('c:/tmp/test.png')
+    
+    for elmt in elmts:
+        
+        href = elmt.get_attribute('href')
+        txt = elmt.text
+        if len(txt) > 10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href,
+                              'dt': datetime.datetime.now(), 'num': 1})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+        idx += 1
+
+
+def run_once(q):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    # s = Service('/Users/zooeytsai/Downloads/chromedriver 2')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", "127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+# for c in lst:
+# while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+    
+    def get(self, kw, domain, port):
+        print(kw)
+        print(domain)
+        run_once((kw, domain, port))
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+    
+    
+    def get(self, kw, domain, port):
+        print('kw')
+        print(domain)
+        run_once((kw, domain, port))
+

+ 38 - 0
SEO/month_kw_rank.py

@@ -0,0 +1,38 @@
+import pandas as pd
+import time
+
+def day_col(row):
+    result = row['dt'][0:10]
+    return result
+
+
+custom_name = ['毛怪','火柴人','清原','仁本']
+for name in custom_name:
+    df = pd.read_csv(f"/Users/zooeytsai/Documents/{name}5月關鍵字排名進前十名.csv")
+    df['dt2'] = df.apply(day_col, axis=1)
+    df = df.drop_duplicates(subset=['dt2','kw'])
+    df_kw_rank = df[['dt2','kw','ranking']].sort_values(by='dt2')
+    df_kw_rank_2 = df_kw_rank.reset_index(drop=True)
+    df_kw_rank_2.columns = ['日期','關鍵字','名次']
+    print(df_kw_rank_2)
+    # df_=pd.DataFrame(columns=list('  '))
+    # print(pd.concat([df_kw_rank,df_]))
+    data = []
+    num = df.groupby('dt2',as_index=False).size()
+    for index,row in num.iterrows():
+        data.append([row['dt2'],row['size'],20,row['size']*20])
+    df_first = pd.DataFrame(data,columns=['日期','關鍵字出現次數','首頁日費','首頁小計'])
+    #前三名
+    df_top_3 = df.loc[df['ranking']<=3]
+    num_top_3 = df_top_3.groupby('dt2',as_index=False).size()
+    data_2 = []
+    for index,row in num_top_3.iterrows():
+        data_2.append([row['dt2'],row['size'],40,row['size']*40])
+    df_second = pd.DataFrame(data_2,columns=['日期','前3名字組數量','前3名字組日費','前3名字組小計'])
+    df_result = pd.merge(df_first,df_second,on='日期',how='outer').fillna(0)
+    new = pd.concat([df_kw_rank_2,df_result],axis=1)
+    # df_result.insert(0,'日期 ',df_kw_rank['日期'])
+    # df_result.insert(1,'關鍵字 ',df_kw_rank['關鍵字'])
+    # df_result.insert(2,'名次 ',df_kw_rank['名次'])
+    new.to_csv(f"/Users/zooeytsai/Documents/{name}5月績效報表2.csv",index=False)
+    time.sleep(60)

+ 148 - 0
SEO/ranking_day.py

@@ -0,0 +1,148 @@
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import random
+from selenium.webdriver.chrome.service import Service
+import os
+from random import randint
+import pymysql
+pymysql.install_as_MySQLdb()
+
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+driver = None
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst = []
+table = db['google_rank']
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def process_one(item):
+    global driver
+    
+    term = item[0]
+    domain = item[1]
+    print(term, domain)
+    
+    escaped_search_term = urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    # fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # df=pd.DataFrame()
+    
+    elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    cnt = 1
+    datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+    
+    if len(elmts) == 0:
+        print('chrome異常')
+        os.chdir('/root')
+        os.system('python3 reboot.py')
+    
+    for elmt in elmts:
+        try:
+            href = elmt.get_attribute('href')
+            
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+            if domain in href:
+                print(href)
+                print(elmt.text)
+                table.insert(
+                    {'title': elmt.text, 'url': href, 'keyword': term, 'dt': datetime.datetime.now(), 'num': cnt})
+            cnt += 1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if len(datadict['結果標題']) <= 0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    # df['搜尋詞']=datadict['搜尋詞']
+    # df['結果標題']=datadict['結果標題']
+    # df['結果網址']=datadict['結果網址']
+    # df['結果名次']=datadict['結果名次']
+    #
+    # df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    
+    driver.quit()
+    print('中場休息')
+    time.sleep(randint(90, 120))
+
+
+def run_once(pport, item):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one(item)
+    time.sleep(3)
+    driver.quit()
+
+
+cursor = db.query('select term,domain from seo.selected_kw')
+for c in cursor:
+    lst.append([c['term'], c['domain']])
+
+    
+for i in lst:
+    print('這裡', i)
+    while True:
+        try:
+            os.system('docker container restart tiny9')
+            time.sleep(1)
+            run_once(9928, i)
+            print('docker開啟完成')
+            cur = db.query('select * from seo.google_rank order by id  desc limit 1')
+            for c in cur:
+                kw = c['keyword']
+            if kw != i[0]:
+                print('稍等,上一筆待完成')
+                time.sleep(60)
+            break
+        except:
+            os.system('docker container restart tiny9')
+            time.sleep(15)
+    print('等待進行下一個關鍵字')
+    time.sleep(5)

+ 129 - 0
SEO/ranking_day2.py

@@ -0,0 +1,129 @@
+from random import randint
+import sys
+import dataset
+from selenium import webdriver
+import traceback
+import datetime
+import codecs
+import time
+import urllib
+import argparse
+import logging
+import sys
+from logging.handlers import SysLogHandler
+import socket
+import pandas as pd
+import pymysql
+
+pymysql.install_as_MySQLdb()
+import random
+from selenium.webdriver.chrome.service import Service
+import os
+import fire
+
+path = 'C:\portable\chromedriver'
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+driver = None
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+lst = []
+table = db['google_rank']
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def process_one(item):
+    global driver
+    term = item[0]
+    domain = item[1]
+    print(term, domain)
+    
+    escaped_search_term = urllib.parse.quote(term)
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+    # fname=term.replace(' ','_')
+    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+    # df=pd.DataFrame()
+    
+    elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+    cnt = 1
+    datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
+    
+    for elmt in elmts:
+        try:
+            href = elmt.get_attribute('href')
+            
+            datadict['搜尋詞'].append(term)
+            datadict['結果標題'].append(elmt.text)
+            datadict['結果網址'].append(href)
+            datadict['結果名次'].append(str(cnt))
+            if domain in href:
+                print(href)
+                print(elmt.text)
+                table.insert(
+                    {'title': elmt.text, 'url': href, 'keyword': term, 'dt': datetime.datetime.now(), 'num': cnt})
+            cnt += 1
+        except:
+            print('href2 exception')
+            traceback.print_exc()
+    if len(datadict['結果標題']) <= 0:
+        print('None')
+        driver.quit()
+        sys.exit()
+    # df['搜尋詞']=datadict['搜尋詞']
+    # df['結果標題']=datadict['結果標題']
+    # df['結果網址']=datadict['結果網址']
+    # df['結果名次']=datadict['結果名次']
+    #
+    # df.to_excel('/Users/zooeytsai/'+fname+".xls")
+    
+    driver.quit()
+    print('中場休息')
+    time.sleep(randint(90, 120))
+
+
+def run_once(q):
+    global driver
+    result = []
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one(q)
+    time.sleep(3)
+    driver.quit()
+
+
+class JParams(object):
+    
+    def get(self, kw, domain, port):
+        print(kw)
+        print(domain)
+        run_once((kw, domain, port))
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+

+ 64 - 0
SEO/run_gsrack.py

@@ -0,0 +1,64 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+import fire
+import dataset
+import pymysql
+pymysql.install_as_MySQLdb()
+
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('SELECT * FROM seo.selected_kw where client="神助物流"')
+    for c in cursor:
+        lst.append([c['kw']])
+    
+    obj = random.choice(lst)
+    print(obj)
+    kw = obj[0]
+    domain = 'hhh.com.tw'
+    print(kw, domain)
+    
+    s = f'python3 general_gsrack.py get --kw="{kw}" --domain="{domain}" --port={str(pport)}'
+    
+    intval = os.system(f'python3 general_gsrack.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
+    
+    print('執行genetal_gsrack')
+    
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny9')
+                time.sleep(10)
+                run_once(9928, 'tiny9')
+            
+            except:
+                os.system('docker container restart tiny9')
+                time.sleep(15)
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+

+ 59 - 0
SEO/run_ranking_day.py

@@ -0,0 +1,59 @@
+import sys
+import random
+import os
+import time
+import fire
+import dataset
+from random import randint
+import pymysql
+pymysql.install_as_MySQLdb()
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term,domain from seo.selected_kw')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+
+    cur = db.query('select * from seo.google_rank order by id  desc limit 1')
+    count_row = db.query('select count(*) from seo.google_rank where CAST(dt AS DATE) = CAST( curdate() AS DATE)')
+    for c in cur:
+        kw = c['keyword']
+    for c in count_row:
+        now_day_len = c['count(*)']
+        print(now_day_len)
+    if now_day_len == 0:
+        print('首位')
+        intval = os.system(
+            f'python3 ranking_day.py get --kw="清原 中央" --domain="taroboba-yuan.com" --port="{str(pport)}"')
+    
+    for i in lst:
+        if i[0] == kw:
+            id = lst.index(i)
+            intval = os.system(f'python3 ranking_day.py get --kw="{lst[id+1][0]}" --domain="{lst[id+1][1]}" --port="{str(pport)}"')
+
+        print('執行ranking_day.py')
+        if intval == -1:
+            print('-1')
+            sys.exit()
+
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny9')
+                time.sleep(1)
+                run_once(9928,'tiny9')
+                time.sleep(20)
+                break
+            except:
+                os.system('docker container restart tiny9')
+                time.sleep(15)
+
+
+# if __name__ == '__main__':
+#     run_once()
+

+ 61 - 0
monitor/monitor_chrome.py

@@ -0,0 +1,61 @@
+import sys
+from selenium import webdriver
+import urllib
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+import os
+import time
+
+def process_one():
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote('風起'), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+
+    while True:
+        try:
+            elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+            print('尋找')
+            break
+        except:
+            pass
+    print('搜尋結果數量',len(elmts))
+    n = 0
+    n_run = 0
+    if len(elmts) == 0:
+        n+=1
+        print('異常次數',n)
+        os.system('python3 reboot.py')
+    else:
+        print('正常')
+    n_run+=1
+    print('執行次數:',n_run)
+
+def run_once(pport):
+    global driver
+    s = Service('/root/driver/chromedriver')
+    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125"
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
+    options.add_argument("--user-agent=" + user_agent)
+    options.add_argument("--incognito")
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_one()
+    time.sleep(3)
+    driver.quit()
+   
+try:
+    os.system('docker container restart tiny10')
+    time.sleep(1)
+    run_once(9929)
+
+except:
+    os.system('docker container restart tiny9')
+    time.sleep(15)