1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #import urllib.request
- import urllib
- import requests
- import traceback
- from bs4 import BeautifulSoup
- import json
- import os
- import time
- import sys
- import random
- from seleniumwire import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait, Select
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.remote.webdriver import WebDriver
- import dataset
- import docker
- import datetime
- import gzip
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- table=db['kw_related']
- lst=[]
- cursor=db.query('SELECT distinct kw FROM gtrends.gtrend_jsraw order by id desc')
- for c in cursor:
- lst.append(c['kw'])
- for l in lst:
- fullkw=l
- alldict={}
- cursor=db.query('SELECT * FROM gtrends.gtrend_jsraw where kw="'+fullkw+'" order by id desc')
- for c in cursor:
- js=c['json']
- jsobj=json.loads(js)
- # jsobj=jsobj['rankedKeyword']
- for j in jsobj:
- kws=j['rankedKeyword']
- for kw in kws:
- if 'query' in kw:
- print(kw['query'])
- alldict[kw['query']]=1
- for k,v in alldict.items():
- try:
- table.insert({'original':fullkw,'kw':k})
- except:
- print('except')
- print(k)
- # if len(alldict)>=5:
- # break
- print(alldict)
- # break
- # print(j['title']['query'])
- # for a in j['articles']:
- # print(a['title'])
- # if a.get('image')!= None:
- # print(a['image'])
- # print(a['image']['imageUrl'])
- ## print(a['image']['newsUrl'])
- # for r in j['relatedQueries']:
- # print("-->" +r['query'])
|