|
@@ -0,0 +1,63 @@
|
|
|
+import re
|
|
|
+import instaloader
|
|
|
+import pymysql
|
|
|
+import sqlite3
|
|
|
+
|
|
|
+hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
|
|
|
+
|
|
|
+def search_hashtag(session, hashtag):
|
|
|
+ "Function to search posts by a particular hashtag."
|
|
|
+ jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
|
|
|
+ hasNextPage = True
|
|
|
+ pageNumber = 1
|
|
|
+
|
|
|
+ while hasNextPage:
|
|
|
+ sections = jsonData['data']['recent']['sections']
|
|
|
+
|
|
|
+ for section in sections:
|
|
|
+ for post in section['layout_content']['medias']:
|
|
|
+ # ------------------------------------------
|
|
|
+ # Zelf toegevoegd:
|
|
|
+ caption = post['media']['caption']['text']
|
|
|
+ metadata = dict(
|
|
|
+ hashtags = hashtag_regex.findall(caption.lower()),
|
|
|
+ )
|
|
|
+ # Generator gemaakt met metadata, ipv print in het origineel:
|
|
|
+ yield metadata
|
|
|
+
|
|
|
+ hasNextPage = jsonData['data']['recent']['more_available']
|
|
|
+ if hasNextPage:
|
|
|
+ jsonData = session.context.get_json(
|
|
|
+ path="explore/tags/" + hashtag + "/",
|
|
|
+ params={"__a": 1,
|
|
|
+ "max_id": jsonData['data']['recent']['next_max_id']}
|
|
|
+ )
|
|
|
+ pageNumber += 1
|
|
|
+
|
|
|
+
|
|
|
+L = instaloader.Instaloader()
|
|
|
+L.login('username','password')
|
|
|
+
|
|
|
+db = pymysql.connect(host='localhost',
|
|
|
+ user='root',
|
|
|
+ password='jondae350',
|
|
|
+ database='ig_tags')
|
|
|
+
|
|
|
+cur = db.cursor()
|
|
|
+sql = "INSERT INTO ig (idnew_table,pimples VALUES (%s,%s)"
|
|
|
+all_tags = []
|
|
|
+
|
|
|
+posts = search_hashtag(L, "粉刺")
|
|
|
+c = 0
|
|
|
+for post in posts:
|
|
|
+ print(post)
|
|
|
+ for t in post['hashtags']:
|
|
|
+ all_tags.append((c,t))
|
|
|
+ c+=1
|
|
|
+
|
|
|
+cur.executemany(sql, all_tags)
|
|
|
+db.commit()
|
|
|
+db.close()
|
|
|
+
|
|
|
+
|
|
|
+
|