|
@@ -0,0 +1,120 @@
|
|
|
+import re
|
|
|
+import instaloader
|
|
|
+import dataset
|
|
|
+import codecs
|
|
|
+import sys
|
|
|
+import pprint
|
|
|
+import json
|
|
|
+import time
|
|
|
+from instaloader import Instaloader, Hashtag
|
|
|
+#db = dataset.connect('sqlite:///:memory:)
|
|
|
+#db = dataset.connect('sqlite:///c:/tmp/ig.db')
|
|
|
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+
|
|
|
+table=db['ig_tags']
|
|
|
+
|
|
|
+hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
|
|
+
|
|
|
+def proc_tags(stmt):
|
|
|
+ global ses_id
|
|
|
+ global query
|
|
|
+ metadata = dict(
|
|
|
+ hashtags = hashtag_regex.findall(stmt.lower()),
|
|
|
+ )
|
|
|
+# print(metadata)``
|
|
|
+ for k,v in metadata.items():
|
|
|
+ for elmt in v:
|
|
|
+ print(elmt)
|
|
|
+ table.insert({'kw':elmt,'sid':ses_id,'query':query})
|
|
|
+# print(v)
|
|
|
+# print(metadata)
|
|
|
+
|
|
|
+def search_hashtag(session, hashtag):
|
|
|
+ fw=codecs.open('c:/tmp/tags.txt','w','utf-8')
|
|
|
+ jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
|
|
|
+ fw.write(json.dumps(jsonData))
|
|
|
+ fw.close()
|
|
|
+
|
|
|
+ top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
|
|
|
+ print(top_posts)
|
|
|
+ maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
|
|
|
+ print(maxid)
|
|
|
+ print(maxid)
|
|
|
+ for e in top_posts['edges']:
|
|
|
+ for e2 in e['node']['edge_media_to_caption']['edges']:
|
|
|
+ proc_tags(e2['node']['text'])
|
|
|
+
|
|
|
+
|
|
|
+ for i in range(3):
|
|
|
+ time.sleep(5)
|
|
|
+ jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1,"max_id":maxid})
|
|
|
+ top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
|
|
|
+
|
|
|
+ maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
|
|
|
+ print(maxid)
|
|
|
+ for e in top_posts['edges']:
|
|
|
+ for e2 in e['node']['edge_media_to_caption']['edges']:
|
|
|
+ proc_tags(e2['node']['text'])
|
|
|
+
|
|
|
+ hasNextPage = True
|
|
|
+ pageNumber = 1
|
|
|
+
|
|
|
+
|
|
|
+L = instaloader.Instaloader(user_agent='Mozilla/5.0 (Linux; Android 9; KFONWI Build/PS7326.3183N; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/106.0.5249.170 Safari/537.36 Instagram 236.0.0.20.109 Android (28/9; 213dpi; 800x1216; Amazon; KFONWI; onyx; mt8168; en_US; 371679860)')
|
|
|
+
|
|
|
+
|
|
|
+ses_id='198'
|
|
|
+#query='補品'
|
|
|
+#query='滴雞精'
|
|
|
+#query='燉雞湯'
|
|
|
+#query='營養品'
|
|
|
+#query='胺基酸'
|
|
|
+#query='營養師'
|
|
|
+#query='營養補品'
|
|
|
+#query='營養補給'
|
|
|
+#query='粉光蔘'
|
|
|
+#query='調整體質'
|
|
|
+#query='天然漢方'
|
|
|
+#query='考生必備'
|
|
|
+##query='維他命'
|
|
|
+#query='礦物質'
|
|
|
+#query='西洋蔘'
|
|
|
+#query='補充體力'
|
|
|
+#query='補身'
|
|
|
+#query='營養成分'
|
|
|
+#query='飲食控制'
|
|
|
+query='心焙雞精'
|
|
|
+
|
|
|
+#query='龜記'
|
|
|
+#query='鶴茶樓'
|
|
|
+#query='初韻'
|
|
|
+#query='醫療廢棄物'
|
|
|
+#query='飲料控'
|
|
|
+cnt=0
|
|
|
+
|
|
|
+#cursor=db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='tmp'")
|
|
|
+#for c in cursor:
|
|
|
+# cnt+=1
|
|
|
+#if cnt>0:
|
|
|
+cursor=db.query('select query from ig_tags where query = "'+query+'" ')
|
|
|
+cnt=0
|
|
|
+for c in cursor:
|
|
|
+ cnt+=1
|
|
|
+if cnt>0:
|
|
|
+ sys.exit()
|
|
|
+#if len(cursor)<=0:
|
|
|
+# sys.exit()
|
|
|
+
|
|
|
+
|
|
|
+posts = search_hashtag(L, query)
|
|
|
+
|
|
|
+#posts = search_hashtag(L, "50嵐")
|
|
|
+cursor=db.query('select kw,count(*) as cnt from ig_tags where sid="'+ses_id+'" group by kw order by count(*) desc')
|
|
|
+for c in cursor:
|
|
|
+ print(c['kw'])
|
|
|
+ print(c['cnt'])
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|