Переглянути джерело

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Jason 2 роки тому
батько
коміт
32db5c37e5

+ 50 - 69
INNNews/run_sheet_2.py

@@ -1,103 +1,84 @@
 import sys
 import codecs
 import traceback
-import requests
 import re
 import pandas as pd
 import random
-import urllib
-import json
-import gspread
-import datetime
-from gspread_pandas import Spread, Client
-from oauth2client.service_account import ServiceAccountCredentials
 import os
 import redis
 import time
+import json
 import fire
 import dataset
+import pymysql
+
+pymysql.install_as_MySQLdb()
 
 
-def run_once(pport, dockername):
+def run_once():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-    lst = []
-    
-    cursor = db.query('select term, domain from seo.selected_kw where client not in ("毛怪","hhh")')
-    for c in cursor:
-        lst.append([c['term'], c['domain']])
-    #lst2=[['台灣仁本生命服務團隊','sctt.com.tw']]
+    lst = ['倉儲管理 神助移動貨架', '倉儲管理 神助貨架', '倉儲 管理']
+
+    # cursor = db.query('select term, domain from seo.selected_kw where client="仁本"')
+    # for c in cursor:
+    #     lst.append([c['term'], c['domain']])
+
     obj = random.choice(lst)
-    kw = obj[0]
-    domain = obj[1]
+    kw = obj
+    domain = 'jiatetea.tw'
     print(kw, domain)
-    db.close()
-    
-    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
-    
+    os.chdir('C:\/Users\/s1301\/PycharmProjects\/kw_tools\/INNNews')
+    intval = os.system(f'python3 general_clickbot_local.py get --kw="{kw}" --domain="{domain}"')
+
     print('執行完成genetal_clickbot')
-    
+
     if intval == -1:
         print('-1')
         sys.exit()
+    db.close()
+
 
-def run_specific(pport, dockername):
+def run_specific():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    # db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
     lst = []
-    
-    cursor = db.query('select term, domain from seo.selected_kw where client="神助物流"')
+
+    ####形象SEO####
+    # cursor = db.query('select json from seo.seo_jobs where id between 937 and 946')
+    # for c in cursor:
+    #     print(c['json'])
+    #     js = json.loads(c['json'])
+    #     prefix = js['prefix']
+    #     postfix = js['postfix']
+    #     domain = js['domain'][0]
+    #     positive = js['positive']
+    #     rnd = js['rnd']
+    #     kw1 = random.choice(positive)
+    #     kw2 = random.choice(rnd)
+    #     kw = prefix + " " + kw1 + " " + kw2
+    #     # print(kw, domain)
+
+    ###形象SEO###
+    cursor = db.query('select term, domain from seo.selected_kw where client="plantA"')
     for c in cursor:
         lst.append([c['term'], c['domain']])
-    db.close()
 
     obj = random.choice(lst)
-    print('隨機',obj)
-    #obj = ['貨架','gs-rack.com']
-    if obj[0]=='重型 架':
-        lst_1 = ['重型架 無軌式重型移動','重型架 無軌移動貨架','重型 架 神助物流']
-        kw = random.choice(lst_1)
-    elif obj[0]=='貨架':
-        lst_2=['貨架 無軌式重型移動','貨架 無軌式重型移動貨架','貨架 無軌移動貨架儲存方案']
-        kw = random.choice(lst_2)
-    #elif obj[0]=='倉儲 設備':
-    #    kw = '倉儲 設備'
-    #    print(kw)
-    else:
-        lst_3=[]
-        lst_3.append(obj[0]+' 神助物流')
-        lst_3.append(obj[0])
-        kw = random.choice(lst_3)
+    kw = obj[0]
     domain = obj[1]
-    print('執行的關鍵字',kw,domain)
+    print(kw, domain)
 
-    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')    
-    print('執行完成神助genetal_clickbot')
+    intval = os.system(f'python3 general_clickbot_local.py get --kw="{kw}" --domain="{domain}"')
+    print('執行完成genetal_clickbot')
 
     if intval == -1:
         print('-1')
         sys.exit()
+    db.close()
+
+# run_specific()
+while True:
+    # run_once()
+    run_specific()
+    time.sleep(random.randint(120, 150))
 
-class JParams(object):
-    
-    def get(self, port=9222):
-        while True:
-            try:
-                os.system('docker container restart tiny1')
-                os.system('docker container restart tiny2')
-                os.system('docker container restart tiny3')
-                os.system('docker container restart tiny8')
-                #os.system('docker container restart tiny10')
-                time.sleep(1)
-                run_once(9922, 'tiny1')
-                run_once(9923, 'tiny2')
-                run_once(9924, 'tiny3')
-                #run_once(9930, 'tiny10')  
-                run_specific(9929, 'tiny8')
-                time.sleep(20)
-                break           
-            except:
-                os.system('docker container restart tiny8')
-                time.sleep(15)
-
-
-if __name__ == '__main__':
-    fire.Fire(JParams)

+ 20 - 3
INNNews/run_sheet_local.py

@@ -7,6 +7,7 @@ import random
 import os
 import redis
 import time
+import json
 import fire
 import dataset
 import pymysql
@@ -40,7 +41,22 @@ def run_specific():
     # db_local = dataset.connect('mysql://root:jondae350@localhost/ig_tags')
     lst = []
 
-    cursor = db.query('select term, domain from seo.selected_kw where client="極簡乳清"')
+    ####形象SEO####
+    # cursor = db.query('select json from seo.seo_jobs where id between 770 and 770 order by rand() limit 1')
+    # for c in cursor:
+    #     js = json.loads(c['json'])
+    #     prefix = js['prefix']
+    #     postfix = js['postfix']
+    #     domain = js['domain'][0]
+    #     positive = js['positive']
+    #     rnd = js['rnd']
+    #
+    # kw1 = random.choice(positive)
+    # kw2 = random.choice(rnd)
+    # kw = prefix + " " + kw1 + " " + kw2
+    # print(kw,domain)
+    ###形象SEO###
+    cursor = db.query('select term, domain from seo.selected_kw where client="美麗馨"')
     for c in cursor:
         lst.append([c['term'], c['domain']])
 
@@ -48,7 +64,7 @@ def run_specific():
     kw = obj[0]
     domain = obj[1]
     print(kw, domain)
-    # os.chdir('/Users/zooeytsai/kw_tools/INNNews')
+
     intval = os.system(f'python3 general_clickbot_local.py get --kw="{kw}" --domain="{domain}"')
     print('執行完成genetal_clickbot')
     
@@ -60,4 +76,5 @@ def run_specific():
 while True:
     # run_once()
     run_specific()
-    time.sleep(random.randint(120,150))
+    time.sleep(random.randint(120,150))
+

+ 10 - 10
SEO/clickbot_100.py

@@ -25,7 +25,7 @@ driver = None
 def restart_browser():
     global driver
     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
-    s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver')
+    s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_109/chromedriver')
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
     # options.add_argument('--remote-debugging-port=9222')
@@ -45,16 +45,16 @@ def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst=[]
     table=db['google_rank']
-    cursor = db.query('select term,domain from seo.selected_kw where client="misa"')
-    # cursor = db.query('select term,url from seo.sns_kw where client="loginheart"')
-    # cursor = db.query('select term from seo.selected_kw where client="鑫富"')
-    # cursor = db.query('select term,domain from seo.select_kw where id between 1148 and 1155')
-    # cursor = db.query('select term,domain from seo.select_kw where client="有夠讚"')
+    cursor = db.query('select term,domain from seo.selected_kw where client="CLIQ露營椅"')
+    # cursor = db.query('select term,url from seo.sns_kw where client="英雄難國美人酒"')
+    # cursor = db.query('select term from seo.selected_kw where client="plantA"')
+    # cursor = db.query('select term,domain from seo.select_kw where id between 2216 and 2255')
+    # cursor = db.query('select term,domain from seo.select_kw where client="only實驗教育"')
     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
     for c in cursor:
         lst.append([c['term'],c['domain']])
-
-    # domain = 'yogoclean.com'
+        # lst.append(c['term'])
+    # domain = 'pinews.asia'
     for i in lst:
         print(i)
         driver=restart_browser()
@@ -63,7 +63,7 @@ def process_one():
         driver.get(googleurl)
         time.sleep(60)
         print(driver.current_url)
-        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/misa\/'+i[0]+'.png')
+        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+i[0]+'.png')
         df=pd.DataFrame()
         elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
         print('結果數量',len(elmts))
@@ -93,7 +93,7 @@ def process_one():
         df['結果網址']=datadict['結果網址']
         df['結果名次']=datadict['結果名次']
 
-        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/misa\/'+i[0]+".xls")
+        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\soapdays\/'+i[0]+".xls")
 
         driver.quit()
         print('等待')

+ 6 - 3
SEO/clickbot_100_one.py

@@ -25,7 +25,7 @@ driver = None
 def restart_browser():
     global driver
     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
-    s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver')
+    s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_109\/chromedriver')
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
     # options.add_argument('--remote-debugging-port=9222')
@@ -42,9 +42,12 @@ def restart_browser():
 
 def process_one():
     # lst=['信義房屋','信義 房屋','信義房仲','信義 房仲']
-    lst=['台北 完美電波']
+    # lst=['雙響泡','双響泡','雙響砲']
+    lst = ['双響泡']
     # lst = ['龔國權']
-    date=''
+    # lst = ['巨力搬家']
+    # lst = ['飲料店加盟金比較','2023飲料店加盟','茶聚加盟']
+    date='0216'
     for term in lst:
         driver=restart_browser()
         escaped_search_term=urllib.parse.quote(term)

+ 19 - 21
SEO/csv_to_sql.py

@@ -7,40 +7,38 @@ pymysql.install_as_MySQLdb()
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-# table=db['seo_jobs']
-table=db['selected_kw']
+table=db['seo_jobs']
+# table=db['selected_kw']
 # table=db['sns_kw']
 # table=db['select_kw']
-client='美麗馨'
-domain='lisinbeauty.com'
+client='清原'
+# domain='ionly.com.tw'
 # for index,row in df.iterrows():
 # with open("C:\/Users\/s1301\/Documents\/新飛國際遊學SEO - 關鍵字12.08.csv") as f:
 #     data_all = f.readlines()
 # print(data_all)
 f = open("C:\/Users\/s1301\/Documents\/456.csv",encoding='utf-8')
 # df = pd.read_csv(f,header=None, names=['kw', 'url'])
-df = pd.read_csv(f,header=None, names=['kw'])
-# df = pd.read_csv(f,header=None, names=['prefix','id', 'positive','domain','rnd'])
+# df = pd.read_csv(f,header=None, names=['kw'])
+df = pd.read_csv(f,header=None, names=['prefix','id', 'positive','domain','rnd'])
 df=df.fillna('')
 # print(df)
 # url='fflc.tw'
 lst=[]
 for index,row in df.iterrows():
-    # print(row['kw'])
-    # lst.append(row['kw'])
-    # prefix='"'+row['prefix']+'"'
-    # # positive='"'+row['positive']+'"'
-    # positive=row['positive']
-    # domain='"'+row['domain']+'"'
-    # rnd='"'+str(row['rnd'])+'"'
-    # postfix='""'
-    # id=row['id']
-    # data = f'"id":{id},"prefix":{prefix},"domain":[{domain}],"postfix":{postfix},"positive":[{positive}],"rnd":[{rnd}]'
-    # json='{'+data+'}'
-    # print(json)
-    #
-    # table.insert({'cust':client,'plan':'形象SEO','json':json})
-    table.insert({'term':row['kw'],'client':client,'domain':domain})
+    # print(row)
+    prefix='"'+row['prefix']+'"'
+    # positive='"'+row['positive']+'"'
+    positive=row['positive']
+    domain='"'+row['domain']+'"'
+    rnd='"'+str(row['rnd'])+'"'
+    postfix='""'
+    id=row['id']
+    data = f'"id":{id},"prefix":{prefix},"domain":[{domain}],"postfix":{postfix},"positive":[{positive}],"rnd":[{rnd}]'
+    json='{'+data+'}'
+    print(json)
+    table.insert({'cust':client,'plan':'形象SEO','json':json})
+    # table.insert({'term':row['kw'],'client':client,'domain':domain})
     # table.insert({'term': row['kw'], 'client': client, 'url': row['url']})
 print(lst)
 {"prefix": "護佳", "postfix": "", "domain": ["www.hujiai-international.com"], "positive": ["細胞食物","紅蔘"], "rnd": [""]}

+ 120 - 0
SEO/ig_mysql.py

@@ -0,0 +1,120 @@
+import re
+import instaloader
+import dataset
+import codecs
+import sys
+import pprint
+import json
+import time
+from instaloader import Instaloader, Hashtag
+#db = dataset.connect('sqlite:///:memory:)
+#db = dataset.connect('sqlite:///c:/tmp/ig.db')
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+table=db['ig_tags']
+
+hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
+
+def proc_tags(stmt):
+    global ses_id
+    global query
+    metadata = dict(
+    hashtags = hashtag_regex.findall(stmt.lower()),
+    )
+#    print(metadata)``
+    for k,v in metadata.items():
+        for elmt in v:
+            print(elmt)
+            table.insert({'kw':elmt,'sid':ses_id,'query':query})
+#        print(v)
+#    print(metadata)
+
+def search_hashtag(session, hashtag):
+    fw=codecs.open('c:/tmp/tags.txt','w','utf-8')
+    jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
+    fw.write(json.dumps(jsonData))
+    fw.close()
+    
+    top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
+    print(top_posts)
+    maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
+    print(maxid)
+    print(maxid)
+    for e in top_posts['edges']:
+        for e2 in e['node']['edge_media_to_caption']['edges']:
+            proc_tags(e2['node']['text'])
+            
+
+    for i in range(3):
+        time.sleep(5)
+        jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1,"max_id":maxid})
+        top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
+
+        maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
+        print(maxid)
+        for e in top_posts['edges']:
+            for e2 in e['node']['edge_media_to_caption']['edges']:
+                proc_tags(e2['node']['text'])
+
+    hasNextPage = True
+    pageNumber = 1
+
+
+L = instaloader.Instaloader(user_agent='Mozilla/5.0 (Linux; Android 9; KFONWI Build/PS7326.3183N; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/106.0.5249.170 Safari/537.36 Instagram 236.0.0.20.109 Android (28/9; 213dpi; 800x1216; Amazon; KFONWI; onyx; mt8168; en_US; 371679860)')
+
+
+ses_id='198'
+#query='補品'
+#query='滴雞精'
+#query='燉雞湯'
+#query='營養品'
+#query='胺基酸'
+#query='營養師'
+#query='營養補品'
+#query='營養補給'
+#query='粉光蔘'
+#query='調整體質'
+#query='天然漢方'
+#query='考生必備'
+##query='維他命'
+#query='礦物質'
+#query='西洋蔘'
+#query='補充體力'
+#query='補身'
+#query='營養成分'
+#query='飲食控制'
+query='心焙雞精'
+
+#query='龜記'
+#query='鶴茶樓'
+#query='初韻'
+#query='醫療廢棄物'
+#query='飲料控'
+cnt=0
+
+#cursor=db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='tmp'")
+#for c in cursor:
+#    cnt+=1
+#if cnt>0:
+cursor=db.query('select query from ig_tags where query = "'+query+'" ')
+cnt=0
+for c in cursor:
+    cnt+=1
+if cnt>0:
+    sys.exit()
+#if len(cursor)<=0:
+#    sys.exit()
+
+
+posts = search_hashtag(L, query)
+
+#posts = search_hashtag(L, "50嵐")
+cursor=db.query('select kw,count(*) as cnt from ig_tags where sid="'+ses_id+'" group by kw order by count(*) desc')
+for c in cursor:
+    print(c['kw'])
+    print(c['cnt'])
+
+
+
+
+

+ 3 - 3
SEO/month_kw_rank.py

@@ -6,9 +6,9 @@ def day_col(row):
     return result
 
 
-custom_name = ['瑞福']
+custom_name = ['鑫富']
 for name in custom_name:
-    df = pd.read_csv(f"C:\\Users\\s1301\\Documents\\12月{name}排名前10.csv",encoding='utf-8')
+    df = pd.read_csv(f"C:\\Users\\s1301\\Documents\\1月{name}排名前10.csv",encoding='utf-8')
     df['dt2'] = df.apply(day_col, axis=1)
     df = df.drop_duplicates(subset=['dt2','kw'])
     df_kw_rank = df[['dt2','kw','ranking']].sort_values(by='dt2')
@@ -34,4 +34,4 @@ for name in custom_name:
     # df_result.insert(0,'日期 ',df_kw_rank['日期'])
     # df_result.insert(1,'關鍵字 ',df_kw_rank['關鍵字'])
     # df_result.insert(2,'名次 ',df_kw_rank['名次'])
-    new.to_csv(f"C:\/Users\/s1301\/Documents\/{name}12月績效報表.csv",index=False)
+    new.to_csv(f"C:\/Users\/s1301\/Documents\/{name}1月績效報表.csv",index=False)

+ 2 - 2
SEO/ranking_report.py

@@ -3,8 +3,8 @@ import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
 
-name='瑞福'
-date = '0110'
+name='plantA'
+date = '0216'
 
 
 def day():

+ 6 - 1
SEO/ranking_world.py

@@ -235,8 +235,13 @@ def run_once(jsobj):
     driver.quit()
     sys.exit()
 
+d = {'ny':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr",
+     'wc':"https://www.google.com/search?q=angelo%20koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1",
+     'miami':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr",
+     'chicago':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"}
 
-run_once({'kw':'angelo koo','fname':'chicago','date':'0112','url':'https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr'})
+location = 'chicago'
+run_once({'kw':'angelo koo','fname':location,'date':'0216','url':d[location]})
 
 ####手動截圖:須按右下角的設定選擇區域######
 ny="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr"

+ 2 - 2
SEO/sns_rank_report.py

@@ -3,8 +3,8 @@ import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
 
-name='superlink'
-date = '0109'
+name='理茶'
+date = '0216'
 
 
 def day():