|  | @@ -0,0 +1,197 @@
 | 
	
		
			
				|  |  | +#import urllib.request
 | 
	
		
			
				|  |  | +import urllib
 | 
	
		
			
				|  |  | +import requests
 | 
	
		
			
				|  |  | +import traceback
 | 
	
		
			
				|  |  | +from bs4 import BeautifulSoup
 | 
	
		
			
				|  |  | +import json
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +import random
 | 
	
		
			
				|  |  | +from seleniumwire import webdriver
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | +from selenium.webdriver.support.ui import WebDriverWait, Select
 | 
	
		
			
				|  |  | +from selenium.webdriver.support import expected_conditions as EC
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.keys import Keys
 | 
	
		
			
				|  |  | +from selenium.webdriver.remote.webdriver import WebDriver
 | 
	
		
			
				|  |  | +import dataset
 | 
	
		
			
				|  |  | +import docker
 | 
	
		
			
				|  |  | +import brotli
 | 
	
		
			
				|  |  | +import gzip
 | 
	
		
			
				|  |  | +import datetime
 | 
	
		
			
				|  |  | +import redis
 | 
	
		
			
				|  |  | +import argparse
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#from fp.fp import FreeProxy
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +localrun=False
 | 
	
		
			
				|  |  | +geo='TW'
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def send(driver, cmd, params={}):
 | 
	
		
			
				|  |  | +    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
 | 
	
		
			
				|  |  | +    url = driver.command_executor._url + resource
 | 
	
		
			
				|  |  | +    body = json.dumps({'cmd': cmd, 'params': params})
 | 
	
		
			
				|  |  | +    response = driver.command_executor._request('POST', url, body)
 | 
	
		
			
				|  |  | +#    if response['status']:
 | 
	
		
			
				|  |  | +#        raise Exception(response.get('value'))
 | 
	
		
			
				|  |  | +    return response.get('value')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def add_script(driver, script):
 | 
	
		
			
				|  |  | +    send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def set_viewport_size(driver, width, height):
 | 
	
		
			
				|  |  | +    window_size = driver.execute_script("""
 | 
	
		
			
				|  |  | +        return [window.outerWidth - window.innerWidth + arguments[0],
 | 
	
		
			
				|  |  | +          window.outerHeight - window.innerHeight + arguments[1]];
 | 
	
		
			
				|  |  | +        """, width, height)
 | 
	
		
			
				|  |  | +    driver.set_window_size(*window_size)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#docker run -d -p 4445:4444  --name p4445  --add-host=host.docker.internal:172.17.0.1  -v /dev/shm:/dev/shm   selenium/standalone-chrome
 | 
	
		
			
				|  |  | +def init_webdriver():
 | 
	
		
			
				|  |  | +    options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +    options.add_argument('--ignore-certificate-errors')
 | 
	
		
			
				|  |  | +    options.add_argument("--no-sandbox")
 | 
	
		
			
				|  |  | +#    options.add_argument("--headless")h
 | 
	
		
			
				|  |  | +    options.add_argument("--disable-gpu")
 | 
	
		
			
				|  |  | +    options.add_argument("--disable-dev-shm-usage")
 | 
	
		
			
				|  |  | +    driver = webdriver.Chrome(
 | 
	
		
			
				|  |  | +        options=options
 | 
	
		
			
				|  |  | +    )
 | 
	
		
			
				|  |  | +    driver.set_window_size(1400,1000)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    return driver
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +class SelGTrend:
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def __init__(self):
 | 
	
		
			
				|  |  | +        self.texts = []
 | 
	
		
			
				|  |  | +        self.links = []
 | 
	
		
			
				|  |  | +        self.results = []
 | 
	
		
			
				|  |  | +        self.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0'
 | 
	
		
			
				|  |  | +        self.headers = {'User-Agent': self.user_agent}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def search(self, key):
 | 
	
		
			
				|  |  | +        self.key = "+".join(key.split(" "))
 | 
	
		
			
				|  |  | +        return self.getpage(self.key)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def getpage(self,query):
 | 
	
		
			
				|  |  | +        global geo
 | 
	
		
			
				|  |  | +        driver=None
 | 
	
		
			
				|  |  | +        result=[]
 | 
	
		
			
				|  |  | +        import urllib.parse
 | 
	
		
			
				|  |  | +        safe_string = urllib.parse.quote_plus(query)
 | 
	
		
			
				|  |  | +        self.url = 'https://trends.google.com/trends/explore?date=now%207-d&geo='+geo+'&q='+safe_string
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            print(self.url)
 | 
	
		
			
				|  |  | +            driver=init_webdriver()
 | 
	
		
			
				|  |  | +#            driver.add_script('const setProperty = () => {     Object.defineProperty(navigator, "webdriver", {       get: () => false,     }); }; setProperty();')
 | 
	
		
			
				|  |  | +            driver.get(self.url)
 | 
	
		
			
				|  |  | +            time.sleep(3)
 | 
	
		
			
				|  |  | +            driver.refresh()
 | 
	
		
			
				|  |  | +            time.sleep(4)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            ub = driver.find_element_by_css_selector('body')
 | 
	
		
			
				|  |  | +            for i in range(9):
 | 
	
		
			
				|  |  | +                ub.send_keys(Keys.PAGE_DOWN)
 | 
	
		
			
				|  |  | +                time.sleep(0.5)
 | 
	
		
			
				|  |  | +#            time.sleep(4)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#            driver.save_screenshot("/tmp/screenshot.png")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            for request in driver.requests:
 | 
	
		
			
				|  |  | +                print(request.url[0:60])
 | 
	
		
			
				|  |  | +                if request.response:
 | 
	
		
			
				|  |  | +                    if 'relatedsearches?' in request.url :
 | 
	
		
			
				|  |  | +                        print('*** parsing js:')
 | 
	
		
			
				|  |  | +                        resp=request.response.body
 | 
	
		
			
				|  |  | +                        data=None
 | 
	
		
			
				|  |  | +                        try:
 | 
	
		
			
				|  |  | +                            data = gzip.decompress(resp)
 | 
	
		
			
				|  |  | +                        except:
 | 
	
		
			
				|  |  | +                            traceback.print_exc()
 | 
	
		
			
				|  |  | +                            data=resp
 | 
	
		
			
				|  |  | +                        
 | 
	
		
			
				|  |  | +                        jstext=data.decode('utf-8')
 | 
	
		
			
				|  |  | +                        print(jstext)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                        jsobj=json.loads(jstext[6:])
 | 
	
		
			
				|  |  | +                        print(jsobj)
 | 
	
		
			
				|  |  | +                        try:
 | 
	
		
			
				|  |  | +                            kws=jsobj['default']['rankedList'][0]['rankedKeyword']
 | 
	
		
			
				|  |  | +                            for kw in kws:
 | 
	
		
			
				|  |  | +                                if kw['hasData']:
 | 
	
		
			
				|  |  | +                                    try:
 | 
	
		
			
				|  |  | +                                        result.append({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
 | 
	
		
			
				|  |  | +#                                        print({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
 | 
	
		
			
				|  |  | +#                                        trend_table.insert({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
 | 
	
		
			
				|  |  | +                                    except:
 | 
	
		
			
				|  |  | +                                        traceback.print_exc()
 | 
	
		
			
				|  |  | +                                    val=int (kw['value'])
 | 
	
		
			
				|  |  | +#                                    if val >=150:
 | 
	
		
			
				|  |  | +#                                        print(kw['query'])
 | 
	
		
			
				|  |  | +#                                        print(kw['value'])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                            kws=jsobj['default']['rankedList'][1]['rankedKeyword']
 | 
	
		
			
				|  |  | +                            for kw in kws:
 | 
	
		
			
				|  |  | +                                try:
 | 
	
		
			
				|  |  | +                                    result.append({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#                                    print({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
 | 
	
		
			
				|  |  | +                                except:
 | 
	
		
			
				|  |  | +                                    traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                                val=int (kw['value'])
 | 
	
		
			
				|  |  | +#                                if val >=150:
 | 
	
		
			
				|  |  | +##                                    print(kw['query'])
 | 
	
		
			
				|  |  | +#                                    print(kw['value'])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                        except:
 | 
	
		
			
				|  |  | +                            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#                        print(jsobj['default']['rankedList'])
 | 
	
		
			
				|  |  | +#                        resultobj=parsing_js(jstext)
 | 
	
		
			
				|  |  | +#                        print("before",datetime.now())
 | 
	
		
			
				|  |  | +#                        save_js_to_db(resultobj,area_num,keyword)
 | 
	
		
			
				|  |  | +#                        print("after",datetime.now())
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#            time.sleep(9999)
 | 
	
		
			
				|  |  | +        except Exception as e:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +            print(e)
 | 
	
		
			
				|  |  | +            pass
 | 
	
		
			
				|  |  | +        driver.quit()
 | 
	
		
			
				|  |  | +        return result
 | 
	
		
			
				|  |  | +#        driver.quit()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def result(self):
 | 
	
		
			
				|  |  | +        return self.results
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def gettext(self):
 | 
	
		
			
				|  |  | +        return self.texts
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def getlinks(self):
 | 
	
		
			
				|  |  | +        return self.links
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def clear(self):
 | 
	
		
			
				|  |  | +        self.texts = []
 | 
	
		
			
				|  |  | +        self.links = []
 | 
	
		
			
				|  |  | +        self.results = []
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def save_to_db(json):
 | 
	
		
			
				|  |  | +    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
 | 
	
		
			
				|  |  | +    table = db['trend_table']
 | 
	
		
			
				|  |  | +    for j in json:
 | 
	
		
			
				|  |  | +        table.insert(j)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +sgtrend=SelGTrend()
 | 
	
		
			
				|  |  | +data=sgtrend.search('稅')
 | 
	
		
			
				|  |  | +save_to_db(data)
 | 
	
		
			
				|  |  | +print(data)
 |