gtrend_newwire.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from seleniumwire import webdriver
  2. import time
  3. from selenium.webdriver.support.ui import WebDriverWait
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.support import expected_conditions as EC
  6. from selenium.webdriver.common.keys import Keys
  7. from seleniumwire.utils import decode
  8. import sys
  9. import json
  10. import dataset
  11. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrend2?charset=utf8mb4')
  12. table=db['topics']
  13. singles={}
  14. cursor=db.query('select distinct sessionid,query from topics ')
  15. for c in cursor:
  16. singles[(c['sessionid'],c['query'])]=1
  17. def init_webdriver():
  18. options = webdriver.ChromeOptions()
  19. options.add_argument('--ignore-certificate-errors')
  20. options.add_argument("--no-sandbox")
  21. # options.add_argument("--headless")
  22. options.add_argument("--disable-gpu")
  23. options.add_argument("--disable-dev-shm-usage")
  24. driver = webdriver.Chrome(
  25. options=options
  26. )
  27. driver.set_window_size(1400,1000)
  28. return driver
  29. def interceptor(request):
  30. global sessionid
  31. global singles
  32. for request in driver.requests:
  33. if 'relatedsearches' in request.url:
  34. if request.response is not None:
  35. rows=[]
  36. # print(request.response.body)
  37. body = decode(request.response.body,'gzip')
  38. bd=body.decode()
  39. # print(body)
  40. bd=bd.replace(r")]}\',\n",'')
  41. bd=bd.replace(r")]}',",'')
  42. bd=bd.encode().decode('unicode-escape')
  43. js=json.loads(bd)
  44. print(js)
  45. rlist=js['default']['rankedList']
  46. for r in rlist:
  47. kws=r['rankedKeyword']
  48. for k in kws:
  49. if k.get('topic') is not None:
  50. if singles.get((sessionid,k['topic']['title'])) is None:
  51. singles[(sessionid,k['topic']['title'])]=1
  52. rows.append({'sessionid':sessionid,'mid':k['topic']['mid'],'query':k['topic']['title'],'type':k['topic']['type'],'value':k['value']})
  53. # table.insert()
  54. print(k['topic'])
  55. print(k['value'])
  56. if k.get('query') is not None:
  57. # table.insert({'query':k['query'],'value':k['value']})
  58. if singles.get((sessionid,k['query'])) is None:
  59. singles[(sessionid,k['query'])]=1
  60. rows.append({'sessionid':sessionid,'query':k['query'],'value':k['value']})
  61. print(k['query'])
  62. print(k['value'])
  63. table.insert_many(rows)
  64. # sys.exit()
  65. # print(bd)
  66. #sessionid='20231014-關鍵字'
  67. sessionid='20231014-HHH'
  68. driver=init_webdriver()
  69. driver.request_interceptor = interceptor
  70. driver.get('https://trends.google.com.tw/')
  71. time.sleep(0.5)
  72. #driver.get('https://trends.google.com.tw/trends/explore?geo=TW&hl=zh-TW')
  73. driver.get('https://trends.google.com.tw/trends/explore?date=now%207-d&geo=TW&hl=zh-TW')
  74. time.sleep(2)
  75. #elmt = driver.find_element(By.XPATH, "//div[@jsname='E470yf']//input[@aria-label='搜尋']")
  76. elmt = driver.find_element(By.XPATH, "//input[@aria-label='新增搜尋字詞']")
  77. print(elmt)
  78. time.sleep(1)
  79. #elmt.send_keys(Keys.ENTER)
  80. elmt.clear()
  81. #ais=['/m/0mkz','/g/11rsc2xsp1']
  82. # 電商'/m/02m96'
  83. elmt.send_keys('/m/0fy6m3')
  84. elmt.send_keys(Keys.ENTER)
  85. time.sleep(5)
  86. #https://trends.google.com.tw/trends/api/widgetdata/relatedsearches?hl=zh-TW&tz=-480&req=%7B%22restriction%22:%7B%22geo%22:%7B%22country%22:%22TW%22%7D,%22time%22:%222023-10-13T06%5C%5C:10%5C%5C:54+2023-10-14T06%5C%5C:10%5C%5C:54%22,%22originalTimeRangeForExploreUrl%22:%22now+1-d%22,%22complexKeywordsRestriction%22:%7B%22keyword%22:%5B%7B%22type%22:%22BROAD%22,%22value%22:%22%E5%8B%95%E7%89%A9%22%7D%5D%7D%7D,%22keywordType%22:%22QUERY%22,%22metric%22:%5B%22TOP%22,%22RISING%22%5D,%22trendinessSettings%22:%7B%22compareTime%22:%222023-10-12T06%5C%5C:10%5C%5C:54+2023-10-13T06%5C%5C:10%5C%5C:54%22%7D,%22requestOptions%22:%7B%22property%22:%22%22,%22backend%22:%22CM%22,%22category%22:0%7D,%22language%22:%22zh%22,%22userCountryCode%22:%22TW%22,%22userConfig%22:%7B%22userType%22:%22USER_TYPE_LEGIT_USER%22%7D%7D&token=APP6_UEAAAAAZSuCbrHsaUiytOcIA80ZR-ChhKV3nwvA
  87. #driver.get('https://trends.google.com.tw/trends/explore?q=%E5%8F%B0%E7%A9%8D%E9%9B%BB%E9%81%8B%E5%8B%95%E6%9C%83&date=now%201-d&geo=TW&hl=zh-TW')
  88. #time.sleep(9999)