|
@@ -8,23 +8,41 @@ from seleniumwire.utils import decode
|
|
|
import sys
|
|
|
import json
|
|
|
import dataset
|
|
|
+import os
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrend2?charset=utf8mb4')
|
|
|
+table_logs=db['gtrend_logs']
|
|
|
table=db['topics']
|
|
|
singles={}
|
|
|
-cursor=db.query('select distinct sessionid,query from topics ')
|
|
|
-for c in cursor:
|
|
|
- singles[(c['sessionid'],c['query'])]=1
|
|
|
def init_webdriver():
|
|
|
+# os.system('taskkill /f /im chrome.exe')
|
|
|
+
|
|
|
options = webdriver.ChromeOptions()
|
|
|
+ options.add_argument("--disable-blink-features=AutomationControlled")
|
|
|
options.add_argument('--ignore-certificate-errors')
|
|
|
- options.add_argument("--no-sandbox")
|
|
|
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
|
+ options.add_experimental_option("useAutomationExtension", False)
|
|
|
+# options.debugger_address = "127.0.0.1:" + '8888'
|
|
|
+
|
|
|
+# options.add_argument("--no-sandbox")
|
|
|
# options.add_argument("--headless")
|
|
|
+# options.add_argument("--incognito")
|
|
|
+
|
|
|
options.add_argument("--disable-gpu")
|
|
|
options.add_argument("--disable-dev-shm-usage")
|
|
|
+ options.add_argument("user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\")
|
|
|
+# options.add_argument('--profile-directory=Profile 7')
|
|
|
+# options.add_argument('--profile-directory=Profile 47')
|
|
|
+ options.add_argument('--profile-directory=Default')
|
|
|
+# options.add_argument('--profile-directory=Profile 64')
|
|
|
+
|
|
|
+# options.add_argument('--profile-directory=Profile 101')
|
|
|
+
|
|
|
+
|
|
|
driver = webdriver.Chrome(
|
|
|
options=options
|
|
|
)
|
|
|
driver.set_window_size(1400,1000)
|
|
|
+ driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
|
|
|
|
return driver
|
|
|
|
|
@@ -36,10 +54,12 @@ def interceptor(request):
|
|
|
if 'relatedsearches' in request.url:
|
|
|
if request.response is not None:
|
|
|
rows=[]
|
|
|
+ if request.response.body is None:
|
|
|
+ continue
|
|
|
# print(request.response.body)
|
|
|
body = decode(request.response.body,'gzip')
|
|
|
bd=body.decode()
|
|
|
- # print(body)
|
|
|
+ print(body)
|
|
|
bd=bd.replace(r")]}\',\n",'')
|
|
|
bd=bd.replace(r")]}',",'')
|
|
|
bd=bd.encode().decode('unicode-escape')
|
|
@@ -69,34 +89,103 @@ def interceptor(request):
|
|
|
# print(bd)
|
|
|
|
|
|
#sessionid='20231014-關鍵字'
|
|
|
-sessionid='20231014-HHH'
|
|
|
+#sessionid='20231018-ChoozMo'
|
|
|
+#sessionid='20231024-AI'
|
|
|
+#sessionid='20231124-HHH'
|
|
|
+#sessionid='20231201-HHH'
|
|
|
+sessionid='20240119-HHH'
|
|
|
+
|
|
|
+cursor=db.query('select distinct sessionid,query from topics ')
|
|
|
+for c in cursor:
|
|
|
+ singles[(c['sessionid'],c['query'])]=1
|
|
|
+
|
|
|
|
|
|
driver=init_webdriver()
|
|
|
driver.request_interceptor = interceptor
|
|
|
|
|
|
-driver.get('https://trends.google.com.tw/')
|
|
|
-time.sleep(0.5)
|
|
|
+#driver.get('https://google.com.tw/')
|
|
|
+
|
|
|
+#driver.get('https://trends.google.com.tw/')
|
|
|
+#time.sleep(9999)
|
|
|
+
|
|
|
#driver.get('https://trends.google.com.tw/trends/explore?geo=TW&hl=zh-TW')
|
|
|
+#driver.get('https://trends.google.com.tw/trends/')
|
|
|
+
|
|
|
+#time.sleep(3)
|
|
|
+
|
|
|
+#elmt = driver.find_element(By.XPATH, "//textarea[@type='search']")
|
|
|
+
|
|
|
+#time.sleep(1)
|
|
|
+#elmt.send_keys('家具')
|
|
|
+#elmt.send_keys(Keys.ENTER)
|
|
|
+#time.sleep(5)
|
|
|
+
|
|
|
driver.get('https://trends.google.com.tw/trends/explore?date=now%207-d&geo=TW&hl=zh-TW')
|
|
|
-time.sleep(2)
|
|
|
-#elmt = driver.find_element(By.XPATH, "//div[@jsname='E470yf']//input[@aria-label='搜尋']")
|
|
|
-elmt = driver.find_element(By.XPATH, "//input[@aria-label='新增搜尋字詞']")
|
|
|
+time.sleep(5)
|
|
|
+#kw_list=['風水','小坪數','老宅','購屋','買房',]
|
|
|
+
|
|
|
+#kw_list=['鍋','洗衣機','冷氣','除濕機','烘碗機','床墊']
|
|
|
+kw_list=['/m/01c979','/g/122rvzch','/g/1q6jh4d9s','/m/0c_jw','/m/0d4wf','/m/0bl2jb','/g/11sr9_h44g','/m/06ht1','/m/03gfsp','/m/06wqb','/g/121kx11r','/m/02cwm','/m/02rfdq','/m/01j2bj','/g/11sr9_mdk7']
|
|
|
+#%2Fm%2F01748f
|
|
|
+#%2Fm%2F02vkqh8
|
|
|
+#'室內裝修'
|
|
|
+#%2Fm%2F02z51p
|
|
|
+#%2Fm%2F0m8q5
|
|
|
+#%2Fm%2F04vct9
|
|
|
+#kw_list=['建材']
|
|
|
+#kw_list=['/m/0mkz']
|
|
|
+#kw_list=['nvidia']
|
|
|
+
|
|
|
+#kw_list=['沙發']
|
|
|
+#房價
|
|
|
+#kw_list=['系統櫃']
|
|
|
+
|
|
|
+
|
|
|
+for kw in kw_list:
|
|
|
+ try:
|
|
|
+ table_logs.insert({'kw':kw,'sessionid':sessionid})
|
|
|
+ except:
|
|
|
+ print('dup')
|
|
|
+ print(kw)
|
|
|
+ #elmt = driver.find_element(By.XPATH, "//div[@jsname='E470yf']//input[@aria-label='搜尋']")
|
|
|
+ elmt = driver.find_element(By.XPATH, "//input[@aria-label='新增搜尋字詞']")
|
|
|
+ elmt.clear()
|
|
|
+ for i in range(20):
|
|
|
+ elmt.send_keys(Keys.BACK_SPACE)
|
|
|
+ elmt.send_keys(kw)
|
|
|
+
|
|
|
+ elmt.send_keys(Keys.ENTER)
|
|
|
+ time.sleep(11)
|
|
|
+time.sleep(9999)
|
|
|
+
|
|
|
+#kw_list=['/g/11j7ys83vr','/g/1yqccwk9n']
|
|
|
+#,'/m/019dx1']
|
|
|
+#kw_list=['/m/01c979','/g/122rvzch','/g/1q6jh4d9s']
|
|
|
+#kw_list=['/m/0c_jw','/m/0d4wf','/m/0bl2jb']
|
|
|
+
|
|
|
+#kw_list=['/g/11sr9_h44g','/m/06ht1','/m/03gfsp']
|
|
|
+#kw_list=['/m/06wqb','/g/121kx11r','/m/02cwm']
|
|
|
+
|
|
|
+#kw_list=['/m/02rfdq','/m/01j2bj','/g/11sr9_mdk7']
|
|
|
|
|
|
print(elmt)
|
|
|
time.sleep(1)
|
|
|
|
|
|
#elmt.send_keys(Keys.ENTER)
|
|
|
-elmt.clear()
|
|
|
+#elmt.clear()
|
|
|
#ais=['/m/0mkz','/g/11rsc2xsp1']
|
|
|
# 電商'/m/02m96'
|
|
|
|
|
|
-elmt.send_keys('/m/0fy6m3')
|
|
|
-elmt.send_keys(Keys.ENTER)
|
|
|
+#elmt.send_keys('/m/0fy6m3')
|
|
|
+#elmt.send_keys('/m/077mq')
|
|
|
+
|
|
|
+#elmt.send_keys(Keys.ENTER)
|
|
|
|
|
|
|
|
|
|
|
|
-time.sleep(5)
|
|
|
|
|
|
+time.sleep(5)
|
|
|
+#time.sleep(9999)
|
|
|
|
|
|
#https://trends.google.com.tw/trends/api/widgetdata/relatedsearches?hl=zh-TW&tz=-480&req=%7B%22restriction%22:%7B%22geo%22:%7B%22country%22:%22TW%22%7D,%22time%22:%222023-10-13T06%5C%5C:10%5C%5C:54+2023-10-14T06%5C%5C:10%5C%5C:54%22,%22originalTimeRangeForExploreUrl%22:%22now+1-d%22,%22complexKeywordsRestriction%22:%7B%22keyword%22:%5B%7B%22type%22:%22BROAD%22,%22value%22:%22%E5%8B%95%E7%89%A9%22%7D%5D%7D%7D,%22keywordType%22:%22QUERY%22,%22metric%22:%5B%22TOP%22,%22RISING%22%5D,%22trendinessSettings%22:%7B%22compareTime%22:%222023-10-12T06%5C%5C:10%5C%5C:54+2023-10-13T06%5C%5C:10%5C%5C:54%22%7D,%22requestOptions%22:%7B%22property%22:%22%22,%22backend%22:%22CM%22,%22category%22:0%7D,%22language%22:%22zh%22,%22userCountryCode%22:%22TW%22,%22userConfig%22:%7B%22userType%22:%22USER_TYPE_LEGIT_USER%22%7D%7D&token=APP6_UEAAAAAZSuCbrHsaUiytOcIA80ZR-ChhKV3nwvA
|
|
|
#driver.get('https://trends.google.com.tw/trends/explore?q=%E5%8F%B0%E7%A9%8D%E9%9B%BB%E9%81%8B%E5%8B%95%E6%9C%83&date=now%201-d&geo=TW&hl=zh-TW')
|