from seleniumwire import webdriver import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from seleniumwire.utils import decode import sys import json import dataset import os db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrend2?charset=utf8mb4') table_logs=db['gtrend_logs'] table=db['topics'] singles={} def init_webdriver(): # os.system('taskkill /f /im chrome.exe') options = webdriver.ChromeOptions() options.add_argument("--disable-blink-features=AutomationControlled") options.add_argument('--ignore-certificate-errors') options.add_experimental_option("excludeSwitches", ["enable-automation"]) options.add_experimental_option("useAutomationExtension", False) # options.debugger_address = "127.0.0.1:" + '8888' # options.add_argument("--no-sandbox") # options.add_argument("--headless") # options.add_argument("--incognito") options.add_argument("--disable-gpu") options.add_argument("--disable-dev-shm-usage") options.add_argument("user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\") # options.add_argument('--profile-directory=Profile 7') # options.add_argument('--profile-directory=Profile 47') options.add_argument('--profile-directory=Default') # options.add_argument('--profile-directory=Profile 64') # options.add_argument('--profile-directory=Profile 101') driver = webdriver.Chrome( options=options ) driver.set_window_size(1400,1000) driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") return driver def interceptor(request): global sessionid global singles for request in driver.requests: if 'relatedsearches' in request.url: if request.response is not None: rows=[] if request.response.body is None: continue # print(request.response.body) body = decode(request.response.body,'gzip') bd=body.decode() print(body) bd=bd.replace(r")]}\',\n",'') bd=bd.replace(r")]}',",'') bd=bd.encode().decode('unicode-escape') js=json.loads(bd) print(js) rlist=js['default']['rankedList'] for r in rlist: kws=r['rankedKeyword'] for k in kws: if k.get('topic') is not None: if singles.get((sessionid,k['topic']['title'])) is None: singles[(sessionid,k['topic']['title'])]=1 rows.append({'sessionid':sessionid,'mid':k['topic']['mid'],'query':k['topic']['title'],'type':k['topic']['type'],'value':k['value']}) # table.insert() print(k['topic']) print(k['value']) if k.get('query') is not None: # table.insert({'query':k['query'],'value':k['value']}) if singles.get((sessionid,k['query'])) is None: singles[(sessionid,k['query'])]=1 rows.append({'sessionid':sessionid,'query':k['query'],'value':k['value']}) print(k['query']) print(k['value']) table.insert_many(rows) # sys.exit() # print(bd) #sessionid='20231014-關鍵字' #sessionid='20231018-ChoozMo' #sessionid='20231024-AI' #sessionid='20231124-HHH' #sessionid='20231201-HHH' sessionid='20240119-HHH' cursor=db.query('select distinct sessionid,query from topics ') for c in cursor: singles[(c['sessionid'],c['query'])]=1 driver=init_webdriver() driver.request_interceptor = interceptor #driver.get('https://google.com.tw/') #driver.get('https://trends.google.com.tw/') #time.sleep(9999) #driver.get('https://trends.google.com.tw/trends/explore?geo=TW&hl=zh-TW') #driver.get('https://trends.google.com.tw/trends/') #time.sleep(3) #elmt = driver.find_element(By.XPATH, "//textarea[@type='search']") #time.sleep(1) #elmt.send_keys('家具') #elmt.send_keys(Keys.ENTER) #time.sleep(5) driver.get('https://trends.google.com.tw/trends/explore?date=now%207-d&geo=TW&hl=zh-TW') time.sleep(5) #kw_list=['風水','小坪數','老宅','購屋','買房',] #kw_list=['鍋','洗衣機','冷氣','除濕機','烘碗機','床墊'] kw_list=['/m/01c979','/g/122rvzch','/g/1q6jh4d9s','/m/0c_jw','/m/0d4wf','/m/0bl2jb','/g/11sr9_h44g','/m/06ht1','/m/03gfsp','/m/06wqb','/g/121kx11r','/m/02cwm','/m/02rfdq','/m/01j2bj','/g/11sr9_mdk7'] #%2Fm%2F01748f #%2Fm%2F02vkqh8 #'室內裝修' #%2Fm%2F02z51p #%2Fm%2F0m8q5 #%2Fm%2F04vct9 #kw_list=['建材'] #kw_list=['/m/0mkz'] #kw_list=['nvidia'] #kw_list=['沙發'] #房價 #kw_list=['系統櫃'] for kw in kw_list: try: table_logs.insert({'kw':kw,'sessionid':sessionid}) except: print('dup') print(kw) #elmt = driver.find_element(By.XPATH, "//div[@jsname='E470yf']//input[@aria-label='搜尋']") elmt = driver.find_element(By.XPATH, "//input[@aria-label='新增搜尋字詞']") elmt.clear() for i in range(20): elmt.send_keys(Keys.BACK_SPACE) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(11) time.sleep(9999) #kw_list=['/g/11j7ys83vr','/g/1yqccwk9n'] #,'/m/019dx1'] #kw_list=['/m/01c979','/g/122rvzch','/g/1q6jh4d9s'] #kw_list=['/m/0c_jw','/m/0d4wf','/m/0bl2jb'] #kw_list=['/g/11sr9_h44g','/m/06ht1','/m/03gfsp'] #kw_list=['/m/06wqb','/g/121kx11r','/m/02cwm'] #kw_list=['/m/02rfdq','/m/01j2bj','/g/11sr9_mdk7'] print(elmt) time.sleep(1) #elmt.send_keys(Keys.ENTER) #elmt.clear() #ais=['/m/0mkz','/g/11rsc2xsp1'] # 電商'/m/02m96' #elmt.send_keys('/m/0fy6m3') #elmt.send_keys('/m/077mq') #elmt.send_keys(Keys.ENTER) time.sleep(5) #time.sleep(9999) #https://trends.google.com.tw/trends/api/widgetdata/relatedsearches?hl=zh-TW&tz=-480&req=%7B%22restriction%22:%7B%22geo%22:%7B%22country%22:%22TW%22%7D,%22time%22:%222023-10-13T06%5C%5C:10%5C%5C:54+2023-10-14T06%5C%5C:10%5C%5C:54%22,%22originalTimeRangeForExploreUrl%22:%22now+1-d%22,%22complexKeywordsRestriction%22:%7B%22keyword%22:%5B%7B%22type%22:%22BROAD%22,%22value%22:%22%E5%8B%95%E7%89%A9%22%7D%5D%7D%7D,%22keywordType%22:%22QUERY%22,%22metric%22:%5B%22TOP%22,%22RISING%22%5D,%22trendinessSettings%22:%7B%22compareTime%22:%222023-10-12T06%5C%5C:10%5C%5C:54+2023-10-13T06%5C%5C:10%5C%5C:54%22%7D,%22requestOptions%22:%7B%22property%22:%22%22,%22backend%22:%22CM%22,%22category%22:0%7D,%22language%22:%22zh%22,%22userCountryCode%22:%22TW%22,%22userConfig%22:%7B%22userType%22:%22USER_TYPE_LEGIT_USER%22%7D%7D&token=APP6_UEAAAAAZSuCbrHsaUiytOcIA80ZR-ChhKV3nwvA #driver.get('https://trends.google.com.tw/trends/explore?q=%E5%8F%B0%E7%A9%8D%E9%9B%BB%E9%81%8B%E5%8B%95%E6%9C%83&date=now%201-d&geo=TW&hl=zh-TW') #time.sleep(9999)