12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- #import urllib.request
- import urllib
- import requests
- import traceback
- from bs4 import BeautifulSoup
- import json
- import os
- import time
- import sys
- import random
- from seleniumwire import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support.ui import WebDriverWait, Select
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.remote.webdriver import WebDriver
- import dataset
- import docker
- import datetime
- import gzip
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
- table=db['trending_search_flat']
- cursor=db.query('SELECT * FROM gtrends.trending_search_json order by id desc limit 24')
- for c in cursor:
- js=c['json']
- cid=c['id']
- dt=c['dt']
- jsobj=json.loads(js)
- for j in jsobj:
- print(j['title']['query'])
- qtitle=j['title']['query']
- for a in j['articles']:
- print(a['title'])
- atitle=a['title']
- if a.get('image')!= None:
- # print(a['image'])
- print(a['image']['imageUrl'])
- print(a['image']['newsUrl'])
- aimg=a['image']['imageUrl']
- aurl=a['image']['newsUrl']
- table.insert({'cid':cid,'qtitle':qtitle,'atitle':atitle,'aimg':aimg,'aurl':aurl,'dt':dt})
- for r in j['relatedQueries']:
- print("-->" +r['query'])
|