zooey 2 年之前
父节点
当前提交
16807420de
共有 8 个文件被更改,包括 138 次插入18 次删除
  1. 4 3
      INNNews/run_sheet_2.py
  2. 5 5
      SEO/clickbot_100.py
  3. 3 3
      SEO/clickbot_100_one.py
  4. 2 3
      SEO/csv_to_sql.py
  5. 120 0
      SEO/ig_mysql.py
  6. 2 2
      SEO/ranking_report.py
  7. 1 1
      SEO/ranking_world.py
  8. 1 1
      SEO/sns_rank_report.py

+ 4 - 3
INNNews/run_sheet_2.py

@@ -44,8 +44,9 @@ def run_specific():
     lst = []
 
     ####形象SEO####
-    # cursor = db.query('select json from seo.seo_jobs where id between 810 and 830')
+    # cursor = db.query('select json from seo.seo_jobs where id between 937 and 946')
     # for c in cursor:
+    #     print(c['json'])
     #     js = json.loads(c['json'])
     #     prefix = js['prefix']
     #     postfix = js['postfix']
@@ -55,10 +56,10 @@ def run_specific():
     #     kw1 = random.choice(positive)
     #     kw2 = random.choice(rnd)
     #     kw = prefix + " " + kw1 + " " + kw2
-    #     print(kw, domain)
+    #     # print(kw, domain)
 
     ###形象SEO###
-    cursor = db.query('select term, domain from seo.selected_kw where client="極簡乳清"')
+    cursor = db.query('select term, domain from seo.selected_kw where client="plantA"')
     for c in cursor:
         lst.append([c['term'], c['domain']])
 

+ 5 - 5
SEO/clickbot_100.py

@@ -45,11 +45,11 @@ def process_one():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst=[]
     table=db['google_rank']
-    # cursor = db.query('select term,domain from seo.selected_kw where client="soapdays"')
+    cursor = db.query('select term,domain from seo.selected_kw where client="CLIQ露營椅"')
     # cursor = db.query('select term,url from seo.sns_kw where client="英雄難國美人酒"')
-    # cursor = db.query('select term from seo.selected_kw where client="薇懠"')
+    # cursor = db.query('select term from seo.selected_kw where client="plantA"')
     # cursor = db.query('select term,domain from seo.select_kw where id between 2216 and 2255')
-    # cursor = db.query('select term,domain from seo.select_kw where client="純皂生活"')
+    # cursor = db.query('select term,domain from seo.select_kw where client="only實驗教育"')
     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
     for c in cursor:
         lst.append([c['term'],c['domain']])
@@ -63,7 +63,7 @@ def process_one():
         driver.get(googleurl)
         time.sleep(60)
         print(driver.current_url)
-        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/soapdays\/'+i[0]+'.png')
+        driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/'+i[0]+'.png')
         df=pd.DataFrame()
         elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
         print('結果數量',len(elmts))
@@ -93,7 +93,7 @@ def process_one():
         df['結果網址']=datadict['結果網址']
         df['結果名次']=datadict['結果名次']
 
-        df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\soapdays\/'+i[0]+".xls")
+        # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\soapdays\/'+i[0]+".xls")
 
         driver.quit()
         print('等待')

+ 3 - 3
SEO/clickbot_100_one.py

@@ -42,12 +42,12 @@ def restart_browser():
 
 def process_one():
     # lst=['信義房屋','信義 房屋','信義房仲','信義 房仲']
-    lst=['雙響泡','双響泡','雙響砲']
-    # lst = ['清原']
+    # lst=['雙響泡','双響泡','雙響砲']
+    lst = ['双響泡']
     # lst = ['龔國權']
     # lst = ['巨力搬家']
     # lst = ['飲料店加盟金比較','2023飲料店加盟','茶聚加盟']
-    date='0209'
+    date='0216'
     for term in lst:
         driver=restart_browser()
         escaped_search_term=urllib.parse.quote(term)

+ 2 - 3
SEO/csv_to_sql.py

@@ -11,8 +11,8 @@ table=db['seo_jobs']
 # table=db['selected_kw']
 # table=db['sns_kw']
 # table=db['select_kw']
-client='信義房屋'
-# domain='thesoapdays.com'
+client='清原'
+# domain='ionly.com.tw'
 # for index,row in df.iterrows():
 # with open("C:\/Users\/s1301\/Documents\/新飛國際遊學SEO - 關鍵字12.08.csv") as f:
 #     data_all = f.readlines()
@@ -27,7 +27,6 @@ df=df.fillna('')
 lst=[]
 for index,row in df.iterrows():
     # print(row)
-    # lst.append(row['kw'])
     prefix='"'+row['prefix']+'"'
     # positive='"'+row['positive']+'"'
     positive=row['positive']

+ 120 - 0
SEO/ig_mysql.py

@@ -0,0 +1,120 @@
+import re
+import instaloader
+import dataset
+import codecs
+import sys
+import pprint
+import json
+import time
+from instaloader import Instaloader, Hashtag
+#db = dataset.connect('sqlite:///:memory:)
+#db = dataset.connect('sqlite:///c:/tmp/ig.db')
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+table=db['ig_tags']
+
+hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
+
+def proc_tags(stmt):
+    global ses_id
+    global query
+    metadata = dict(
+    hashtags = hashtag_regex.findall(stmt.lower()),
+    )
+#    print(metadata)``
+    for k,v in metadata.items():
+        for elmt in v:
+            print(elmt)
+            table.insert({'kw':elmt,'sid':ses_id,'query':query})
+#        print(v)
+#    print(metadata)
+
+def search_hashtag(session, hashtag):
+    fw=codecs.open('c:/tmp/tags.txt','w','utf-8')
+    jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
+    fw.write(json.dumps(jsonData))
+    fw.close()
+    
+    top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
+    print(top_posts)
+    maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
+    print(maxid)
+    print(maxid)
+    for e in top_posts['edges']:
+        for e2 in e['node']['edge_media_to_caption']['edges']:
+            proc_tags(e2['node']['text'])
+            
+
+    for i in range(3):
+        time.sleep(5)
+        jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1,"max_id":maxid})
+        top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
+
+        maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
+        print(maxid)
+        for e in top_posts['edges']:
+            for e2 in e['node']['edge_media_to_caption']['edges']:
+                proc_tags(e2['node']['text'])
+
+    hasNextPage = True
+    pageNumber = 1
+
+
+L = instaloader.Instaloader(user_agent='Mozilla/5.0 (Linux; Android 9; KFONWI Build/PS7326.3183N; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/106.0.5249.170 Safari/537.36 Instagram 236.0.0.20.109 Android (28/9; 213dpi; 800x1216; Amazon; KFONWI; onyx; mt8168; en_US; 371679860)')
+
+
+ses_id='198'
+#query='補品'
+#query='滴雞精'
+#query='燉雞湯'
+#query='營養品'
+#query='胺基酸'
+#query='營養師'
+#query='營養補品'
+#query='營養補給'
+#query='粉光蔘'
+#query='調整體質'
+#query='天然漢方'
+#query='考生必備'
+##query='維他命'
+#query='礦物質'
+#query='西洋蔘'
+#query='補充體力'
+#query='補身'
+#query='營養成分'
+#query='飲食控制'
+query='心焙雞精'
+
+#query='龜記'
+#query='鶴茶樓'
+#query='初韻'
+#query='醫療廢棄物'
+#query='飲料控'
+cnt=0
+
+#cursor=db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='tmp'")
+#for c in cursor:
+#    cnt+=1
+#if cnt>0:
+cursor=db.query('select query from ig_tags where query = "'+query+'" ')
+cnt=0
+for c in cursor:
+    cnt+=1
+if cnt>0:
+    sys.exit()
+#if len(cursor)<=0:
+#    sys.exit()
+
+
+posts = search_hashtag(L, query)
+
+#posts = search_hashtag(L, "50嵐")
+cursor=db.query('select kw,count(*) as cnt from ig_tags where sid="'+ses_id+'" group by kw order by count(*) desc')
+for c in cursor:
+    print(c['kw'])
+    print(c['cnt'])
+
+
+
+
+

+ 2 - 2
SEO/ranking_report.py

@@ -3,8 +3,8 @@ import dataset
 import pymysql
 pymysql.install_as_MySQLdb()
 
-name='護全'
-date = '0209'
+name='plantA'
+date = '0216'
 
 
 def day():

+ 1 - 1
SEO/ranking_world.py

@@ -241,7 +241,7 @@ d = {'ny':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w
      'chicago':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"}
 
 location = 'chicago'
-run_once({'kw':'angelo koo','fname':location,'date':'0208','url':d[location]})
+run_once({'kw':'angelo koo','fname':location,'date':'0216','url':d[location]})
 
 ####手動截圖:須按右下角的設定選擇區域######
 ny="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr"

+ 1 - 1
SEO/sns_rank_report.py

@@ -4,7 +4,7 @@ import pymysql
 pymysql.install_as_MySQLdb()
 
 name='理茶'
-date = '0209'
+date = '0216'
 
 
 def day():