ig_mysql.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import re
  2. import instaloader
  3. import dataset
  4. import codecs
  5. import sys
  6. import pprint
  7. import json
  8. import time
  9. from instaloader import Instaloader, Hashtag
  10. #db = dataset.connect('sqlite:///:memory:)
  11. #db = dataset.connect('sqlite:///c:/tmp/ig.db')
  12. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  13. table=db['ig_tags']
  14. hashtag_regex = re.compile(r"(?:#)(\w(?:(?:\w|(?:\.(?!\.))){0,28}(?:\w))?)")
  15. def proc_tags(stmt):
  16. global ses_id
  17. global query
  18. metadata = dict(
  19. hashtags = hashtag_regex.findall(stmt.lower()),
  20. )
  21. # print(metadata)``
  22. for k,v in metadata.items():
  23. for elmt in v:
  24. print(elmt)
  25. table.insert({'kw':elmt,'sid':ses_id,'query':query})
  26. # print(v)
  27. # print(metadata)
  28. def search_hashtag(session, hashtag):
  29. fw=codecs.open('c:/tmp/tags.txt','w','utf-8')
  30. jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1})
  31. fw.write(json.dumps(jsonData))
  32. fw.close()
  33. top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
  34. print(top_posts)
  35. maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
  36. print(maxid)
  37. print(maxid)
  38. for e in top_posts['edges']:
  39. for e2 in e['node']['edge_media_to_caption']['edges']:
  40. proc_tags(e2['node']['text'])
  41. for i in range(3):
  42. time.sleep(5)
  43. jsonData = session.context.get_json(path="explore/tags/" + hashtag + "/", params={"__a": 1,"max_id":maxid})
  44. top_posts=jsonData['graphql']['hashtag']['edge_hashtag_to_top_posts']
  45. maxid=jsonData['graphql']['hashtag']['edge_hashtag_to_media']["page_info"]["end_cursor"]
  46. print(maxid)
  47. for e in top_posts['edges']:
  48. for e2 in e['node']['edge_media_to_caption']['edges']:
  49. proc_tags(e2['node']['text'])
  50. hasNextPage = True
  51. pageNumber = 1
  52. L = instaloader.Instaloader(user_agent='Mozilla/5.0 (Linux; Android 9; KFONWI Build/PS7326.3183N; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/106.0.5249.170 Safari/537.36 Instagram 236.0.0.20.109 Android (28/9; 213dpi; 800x1216; Amazon; KFONWI; onyx; mt8168; en_US; 371679860)')
  53. ses_id='198'
  54. #query='補品'
  55. #query='滴雞精'
  56. #query='燉雞湯'
  57. #query='營養品'
  58. #query='胺基酸'
  59. #query='營養師'
  60. #query='營養補品'
  61. #query='營養補給'
  62. #query='粉光蔘'
  63. #query='調整體質'
  64. #query='天然漢方'
  65. #query='考生必備'
  66. ##query='維他命'
  67. #query='礦物質'
  68. #query='西洋蔘'
  69. #query='補充體力'
  70. #query='補身'
  71. #query='營養成分'
  72. #query='飲食控制'
  73. query='心焙雞精'
  74. #query='龜記'
  75. #query='鶴茶樓'
  76. #query='初韻'
  77. #query='醫療廢棄物'
  78. #query='飲料控'
  79. cnt=0
  80. #cursor=db.query("SELECT name FROM sqlite_master WHERE type='table' AND name='tmp'")
  81. #for c in cursor:
  82. # cnt+=1
  83. #if cnt>0:
  84. cursor=db.query('select query from ig_tags where query = "'+query+'" ')
  85. cnt=0
  86. for c in cursor:
  87. cnt+=1
  88. if cnt>0:
  89. sys.exit()
  90. #if len(cursor)<=0:
  91. # sys.exit()
  92. posts = search_hashtag(L, query)
  93. #posts = search_hashtag(L, "50嵐")
  94. cursor=db.query('select kw,count(*) as cnt from ig_tags where sid="'+ses_id+'" group by kw order by count(*) desc')
  95. for c in cursor:
  96. print(c['kw'])
  97. print(c['cnt'])