ig_mysql.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. import re
  2. import instaloader
  3. import dataset
  4. import codecs
  5. import sys
  6. import pprint
  7. import json
  8. import time
  9. import pymysql
  10. pymysql.install_as_MySQLdb()
  11. from instaloader import Instaloader, Hashtag
  12. #db = dataset.connect('sqlite:///:memory:)
  13. #db = dataset.connect('sqlite:///c:/tmp/ig.db')
  14. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  15. table=db['ig_tags']
  16. hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
  17. def proc_tags(stmt):
  18. global ses_id
  19. global query
  20. metadata = dict(
  21. hashtags = hashtag_regex.findall(stmt.lower()),
  22. )
  23. # print(metadata)``
  24. for k,v in metadata.items():
  25. for elmt in v:
  26. print(elmt)
  27. table.insert({'kw':elmt,'sid':ses_id,'query':query})
  28. # print(v)
  29. # print(metadata)
  30. def search_hashtag(session, hashtag):
  31. fw=codecs.open('C:\/Users\/s1301\/Downloads\/tags.txt','w','utf-8')
  32. jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
  33. fw.write(json.dumps(jsonData))
  34. fw.close()
  35. top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
  36. print(top_posts)
  37. maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
  38. print(maxid)
  39. print(maxid)
  40. for e in top_posts['edges']:
  41. for e2 in e['node']['edge_media_to_caption']['edges']:
  42. proc_tags(e2['node']['text'])
  43. for i in range(3):
  44. time.sleep(5)
  45. jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1,"max_id":maxid})
  46. top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
  47. maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
  48. print(maxid)
  49. for e in top_posts['edges']:
  50. for e2 in e['node']['edge_media_to_caption']['edges']:
  51. proc_tags(e2['node']['text'])
  52. hasNextPage = True
  53. pageNumber = 1
  54. L = instaloader.Instaloader(user_agent='Mozilla/5.0 (Linux; Android 9; KFONWI Build/PS7326.3183N; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/106.0.5249.170 Safari/537.36 Instagram 236.0.0.20.109 Android (28/9; 213dpi; 800x1216; Amazon; KFONWI; onyx; mt8168; en_US; 371679860)')
  55. ses_id='198'
  56. #query='補品'
  57. #query='滴雞精'
  58. #query='燉雞湯'
  59. #query='營養品'
  60. #query='胺基酸'
  61. #query='營養師'
  62. #query='營養補品'
  63. #query='營養補給'
  64. #query='粉光蔘'
  65. #query='調整體質'
  66. #query='天然漢方'
  67. #query='考生必備'
  68. ##query='維他命'
  69. #query='礦物質'
  70. #query='西洋蔘'
  71. #query='補充體力'
  72. #query='補身'
  73. #query='營養成分'
  74. #query='飲食控制'
  75. query='心焙雞精'
  76. #query='龜記'
  77. #query='鶴茶樓'
  78. #query='初韻'
  79. #query='醫療廢棄物'
  80. #query='飲料控'
  81. cnt=0
  82. #cursor=db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='tmp'")
  83. #for c in cursor:
  84. # cnt+=1
  85. #if cnt>0:
  86. cursor=db.query('select query from ig_tags where query = "'+query+'" ')
  87. cnt=0
  88. for c in cursor:
  89. cnt+=1
  90. if cnt>0:
  91. sys.exit()
  92. #if len(cursor)<=0:
  93. # sys.exit()
  94. posts = search_hashtag(L, query)
  95. #posts = search_hashtag(L, "50嵐")
  96. cursor=db.query('select kw,count(*) as cnt from ig_tags where sid="'+ses_id+'" group by kw order by count(*) desc')
  97. for c in cursor:
  98. print(c['kw'])
  99. print(c['cnt'])