#import urllib.request import urllib import requests import traceback from bs4 import BeautifulSoup import json import os import time import sys import random from seleniumwire import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.webdriver.remote.webdriver import WebDriver import dataset import docker import datetime import gzip db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4') table=db['kw_related'] lst=[] cursor=db.query('SELECT distinct kw FROM gtrends.gtrend_jsraw order by id desc') for c in cursor: lst.append(c['kw']) for l in lst: fullkw=l alldict={} cursor=db.query('SELECT * FROM gtrends.gtrend_jsraw where kw="'+fullkw+'" order by id desc') for c in cursor: js=c['json'] jsobj=json.loads(js) # jsobj=jsobj['rankedKeyword'] for j in jsobj: kws=j['rankedKeyword'] for kw in kws: if 'query' in kw: print(kw['query']) alldict[kw['query']]=1 for k,v in alldict.items(): try: table.insert({'original':fullkw,'kw':k}) except: print('except') print(k) # if len(alldict)>=5: # break print(alldict) # break # print(j['title']['query']) # for a in j['articles']: # print(a['title']) # if a.get('image')!= None: # print(a['image']) # print(a['image']['imageUrl']) ## print(a['image']['newsUrl']) # for r in j['relatedQueries']: # print("-->" +r['query'])