Преглед изворни кода

Merge remote-tracking branch 'origin/master'

zooeytsai пре 2 година
родитељ
комит
ccf321e560

+ 134 - 18
choozmo/watch_yt.py

@@ -18,28 +18,35 @@ import time
 import traceback
 import sys
 import fire
-def init_browser():
-    os.system('docker container restart proxy1')
-    os.system('docker container restart tiny1')
+from userAgentRandomizer import userAgents
 
-    time.sleep(10)
+
+
+
+def init_browser(proxy1='proxy1',tiny1='tiny1',socks5="9050",debug='9922',cdriver=''):
+    os.system('docker container restart '+proxy1)
+    os.system('docker container restart '+tiny1)
+    ua = userAgents()
+    user_agent = ua.random()        
+
+    time.sleep(12)
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
-    options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
+#    options.add_argument('--proxy-server=socks5://127.0.0.1:'+socks5)
     options.add_argument('--disable-dev-shm-usage')
     options.add_argument('--no-sandbox')
-#    options.add_argument('--remote-debugging-port=9222')
+    options.add_argument("--user-agent=" +user_agent)
 
-#    epath=os.environ['WEBDRIVER']
-
-    options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
+    options.add_experimental_option("debuggerAddress", "127.0.0.1:"+debug)
 
 #    options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
 #    options.add_experimental_option("debuggerAddress", q[2])
 
 #    options.add_argument("--user-agent=" +user_agent)
     options.add_argument("--incognito")
-    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+#    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    driver = webdriver.Chrome(executable_path=cdriver,options=options)
+
 #    driver = webdriver.Chrome(executable_path=epath,options=options)
 
     driver.delete_all_cookies()
@@ -48,14 +55,123 @@ def init_browser():
     print(q)
     process_query(q)
 
-driver=init_browser()
-#driver.get('https://www.youtube.com/watch?v=K5DEJXajtqA')
-driver.get('https://www.youtube.com/watch?v=_N0VneVhkoQ')
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+yt_lst=[]
+cursor=db.query('select * from sbir_yt')
+for c in cursor:
+    yt_lst.append(c['url'])
+
+class JParams(object):
+
+  def get(self, yt,proxy1,tiny1,socks5,debug,cdriver):
+    if 'SBIR' in yt:    
+        yt=random.choice(yt_lst)
+        url='https://www.youtube.com/watch?v='+yt
+        print(url)
+    driver=init_browser(proxy1='proxy1',tiny1='tiny1',socks5="9050",debug='9922',cdriver='/root/webdriver/98/chromedriver')
+    #driver.get('https://www.youtube.com/watch?v=K5DEJXajtqA')
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    driver.execute_script('window.open("'+url+'","_blank");')
+    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+    time.sleep(3)
+
+    try:
+        driver.get(url)
+    except:
+        driver.quit()
+        sys.exit()
+        return
+
+    video = driver.find_element_by_id('movie_player')
+    video.send_keys(Keys.SPACE) #hits space
+    time.sleep(1)
+    video.click()               #mouse click
+
+    time.sleep(35)
+    driver.quit()
+    sys.exit()
+
+
+
 
-video = driver.find_element_by_id('movie_player')
-video.send_keys(Keys.SPACE) #hits space
-time.sleep(1)
-video.click()               #mouse click
+if __name__ == '__main__':
+  fire.Fire(JParams)
 
-time.sleep(31)
 

+ 7 - 0
choozmo/yt1.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=SBIR --proxy1=proxy1 --tiny1=tiny1 --socks5=9050 --debug=9922
+	sleep 100
+done
+

+ 7 - 0
choozmo/yt2.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy2 --tiny1=tiny2 --socks5=9052 --debug=9925
+	sleep 1
+done
+

+ 8 - 0
choozmo/yt3.sh

@@ -0,0 +1,8 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy2 --tiny1=tiny3 --socks5=9053 --debug=9926
+
+	sleep 1
+done
+

+ 7 - 0
choozmo/yt4.sh

@@ -0,0 +1,7 @@
+#!/bin/bash
+while :
+do
+    python3 watch_yt.py get --cdriver=/root/webdriver/98/chromedriver --yt=YAt1PdQTp4Q --proxy1=proxy4 --tiny1=tiny4 --socks5=9054 --debug=9927
+	sleep 1
+done
+

+ 197 - 0
gtrends/gtrendtest.py

@@ -0,0 +1,197 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup
+import json
+import os
+import time
+import sys
+import random
+from seleniumwire import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.remote.webdriver import WebDriver
+import dataset
+import docker
+import brotli
+import gzip
+import datetime
+import redis
+import argparse
+
+#from fp.fp import FreeProxy
+
+localrun=False
+geo='TW'
+
+
+def send(driver, cmd, params={}):
+    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
+    url = driver.command_executor._url + resource
+    body = json.dumps({'cmd': cmd, 'params': params})
+    response = driver.command_executor._request('POST', url, body)
+#    if response['status']:
+#        raise Exception(response.get('value'))
+    return response.get('value')
+
+def add_script(driver, script):
+    send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})
+
+def set_viewport_size(driver, width, height):
+    window_size = driver.execute_script("""
+        return [window.outerWidth - window.innerWidth + arguments[0],
+          window.outerHeight - window.innerHeight + arguments[1]];
+        """, width, height)
+    driver.set_window_size(*window_size)
+
+#docker run -d -p 4445:4444  --name p4445  --add-host=host.docker.internal:172.17.0.1  -v /dev/shm:/dev/shm   selenium/standalone-chrome
+def init_webdriver():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--ignore-certificate-errors')
+    options.add_argument("--no-sandbox")
+#    options.add_argument("--headless")h
+    options.add_argument("--disable-gpu")
+    options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(
+        options=options
+    )
+    driver.set_window_size(1400,1000)
+
+    return driver
+
+
+class SelGTrend:
+
+    def __init__(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+        self.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0'
+        self.headers = {'User-Agent': self.user_agent}
+
+    def search(self, key):
+        self.key = "+".join(key.split(" "))
+        return self.getpage(self.key)
+
+    def getpage(self,query):
+        global geo
+        driver=None
+        result=[]
+        import urllib.parse
+        safe_string = urllib.parse.quote_plus(query)
+        self.url = 'https://trends.google.com/trends/explore?date=now%207-d&geo='+geo+'&q='+safe_string
+
+        try:
+            print(self.url)
+            driver=init_webdriver()
+#            driver.add_script('const setProperty = () => {     Object.defineProperty(navigator, "webdriver", {       get: () => false,     }); }; setProperty();')
+            driver.get(self.url)
+            time.sleep(3)
+            driver.refresh()
+            time.sleep(4)
+
+            ub = driver.find_element_by_css_selector('body')
+            for i in range(9):
+                ub.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.5)
+#            time.sleep(4)
+
+#            driver.save_screenshot("/tmp/screenshot.png")
+
+
+            for request in driver.requests:
+                print(request.url[0:60])
+                if request.response:
+                    if 'relatedsearches?' in request.url :
+                        print('*** parsing js:')
+                        resp=request.response.body
+                        data=None
+                        try:
+                            data = gzip.decompress(resp)
+                        except:
+                            traceback.print_exc()
+                            data=resp
+                        
+                        jstext=data.decode('utf-8')
+                        print(jstext)
+
+                        jsobj=json.loads(jstext[6:])
+                        print(jsobj)
+                        try:
+                            kws=jsobj['default']['rankedList'][0]['rankedKeyword']
+                            for kw in kws:
+                                if kw['hasData']:
+                                    try:
+                                        result.append({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
+#                                        print({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
+#                                        trend_table.insert({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
+                                    except:
+                                        traceback.print_exc()
+                                    val=int (kw['value'])
+#                                    if val >=150:
+#                                        print(kw['query'])
+#                                        print(kw['value'])
+
+
+                            kws=jsobj['default']['rankedList'][1]['rankedKeyword']
+                            for kw in kws:
+                                try:
+                                    result.append({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
+
+#                                    print({'kw':query,'query':kw['query'],'value':kw['value'],'expand':0,'geo':geo,'dt':datetime.datetime.now()})
+                                except:
+                                    traceback.print_exc()
+
+                                val=int (kw['value'])
+#                                if val >=150:
+##                                    print(kw['query'])
+#                                    print(kw['value'])
+
+                        except:
+                            traceback.print_exc()
+
+#                        print(jsobj['default']['rankedList'])
+#                        resultobj=parsing_js(jstext)
+#                        print("before",datetime.now())
+#                        save_js_to_db(resultobj,area_num,keyword)
+#                        print("after",datetime.now())
+
+
+#            time.sleep(9999)
+        except Exception as e:
+            traceback.print_exc()
+            print(e)
+            pass
+        driver.quit()
+        return result
+#        driver.quit()
+
+    def result(self):
+        return self.results
+
+    def gettext(self):
+        return self.texts
+
+    def getlinks(self):
+        return self.links
+
+    def clear(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+
+
+def save_to_db(json):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+    table = db['trend_table']
+    for j in json:
+        table.insert(j)
+
+
+sgtrend=SelGTrend()
+data=sgtrend.search('稅')
+save_to_db(data)
+print(data)

+ 158 - 0
gtrends/gtrendtest_jsraw.py

@@ -0,0 +1,158 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup
+import json
+import os
+import time
+import sys
+import random
+from seleniumwire import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.remote.webdriver import WebDriver
+import dataset
+import docker
+import brotli
+import gzip
+import datetime
+import redis
+import argparse
+
+#from fp.fp import FreeProxy
+
+localrun=False
+geo='TW'
+
+
+def send(driver, cmd, params={}):
+    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
+    url = driver.command_executor._url + resource
+    body = json.dumps({'cmd': cmd, 'params': params})
+    response = driver.command_executor._request('POST', url, body)
+#    if response['status']:
+#        raise Exception(response.get('value'))
+    return response.get('value')
+
+def add_script(driver, script):
+    send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})
+
+def set_viewport_size(driver, width, height):
+    window_size = driver.execute_script("""
+        return [window.outerWidth - window.innerWidth + arguments[0],
+          window.outerHeight - window.innerHeight + arguments[1]];
+        """, width, height)
+    driver.set_window_size(*window_size)
+
+#docker run -d -p 4445:4444  --name p4445  --add-host=host.docker.internal:172.17.0.1  -v /dev/shm:/dev/shm   selenium/standalone-chrome
+def init_webdriver():
+    options = webdriver.ChromeOptions()
+    options.add_argument('--ignore-certificate-errors')
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")
+    options.add_argument("--disable-gpu")
+    options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(
+        options=options
+    )
+    driver.set_window_size(1400,1000)
+
+    return driver
+
+
+class SelGTrend:
+    def __init__(self):
+        db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+        self.table=db['gtrend_jsraw']
+        self.yt=False
+        self.texts = []
+        self.links = []
+        self.results = []
+        self.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0'
+        self.headers = {'User-Agent': self.user_agent}
+
+    def search(self, key):
+        self.original=key
+        self.key = "+".join(key.split(" "))
+        return self.getpage(self.key)
+
+    def getpage(self,query):
+        global geo
+        driver=None
+        result=[]
+        import urllib.parse
+        safe_string = urllib.parse.quote_plus(query)
+        if self.yt:
+            self.url = 'https://trends.google.com/trends/explore?date=now%207-d&geo='+geo+'&gprop=youtube&q='+safe_string
+        else:
+            self.url = 'https://trends.google.com/trends/explore?date=now%207-d&geo='+geo+'&q='+safe_string
+
+        try:
+            print(self.url)
+            driver=init_webdriver()
+#            driver.add_script('const setProperty = () => {     Object.defineProperty(navigator, "webdriver", {       get: () => false,     }); }; setProperty();')
+            driver.get(self.url)
+            time.sleep(3)
+            driver.refresh()
+            time.sleep(4)
+
+            ub = driver.find_element_by_css_selector('body')
+            for i in range(9):
+                ub.send_keys(Keys.PAGE_DOWN)
+                time.sleep(0.5)
+#            time.sleep(4)
+
+#            driver.save_screenshot("/tmp/screenshot.png")
+
+
+            for request in driver.requests:
+                print(request.url[0:60])
+                if request.response:
+                    if 'relatedsearches?' in request.url :
+                        print('*** parsing js:')
+                        resp=request.response.body
+                        data=None
+                        try:
+                            data = gzip.decompress(resp)
+                        except:
+                            traceback.print_exc()
+                            data=resp
+                        
+                        jstext=data.decode('utf-8')
+                        print(jstext)
+
+                        jsobj=json.loads(jstext[6:])
+                        jsobj=jsobj['default']['rankedList']
+                        self.table.insert({'kw':self.original,'dt':datetime.datetime.now(),'json':json.dumps(jsobj, ensure_ascii=False).encode('utf8')})
+
+
+                        print(jsobj)
+        except Exception as e:
+            traceback.print_exc()
+            print(e)
+            pass
+        driver.quit()
+        return result
+#        driver.quit()
+
+    def result(self):
+        return self.results
+
+    def gettext(self):
+        return self.texts
+
+    def getlinks(self):
+        return self.links
+
+    def clear(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+
+
+#sgtrend=SelGTrend()
+#data=sgtrend.search('居家')
+#data=sgtrend.search('7-11 當機')

+ 68 - 0
gtrends/process_gtrend.py

@@ -0,0 +1,68 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup
+import json
+import os
+import time
+import sys
+import random
+from seleniumwire import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.remote.webdriver import WebDriver
+import dataset
+import docker
+import datetime
+import gzip
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+table=db['kw_related']
+lst=[]
+cursor=db.query('SELECT distinct kw FROM gtrends.gtrend_jsraw order by id desc')
+for c in cursor:
+    lst.append(c['kw'])
+
+for l in lst:
+    fullkw=l
+    alldict={}
+
+    cursor=db.query('SELECT * FROM gtrends.gtrend_jsraw where kw="'+fullkw+'"  order by id desc')
+    for c in cursor:
+        js=c['json']
+        jsobj=json.loads(js)
+    #    jsobj=jsobj['rankedKeyword']
+        for j in jsobj:
+            kws=j['rankedKeyword']
+            for kw in kws:
+                if 'query' in kw:
+                    print(kw['query'])
+                    alldict[kw['query']]=1
+    for k,v in alldict.items():
+        try:
+            table.insert({'original':fullkw,'kw':k})
+        except:
+            print('except')
+        print(k)
+
+
+#                    if len(alldict)>=5:
+#                        break
+    print(alldict)
+#        break
+#        print(j['title']['query'])
+#        for a in j['articles']:
+#            print(a['title'])
+#            if a.get('image')!= None:
+#                print(a['image'])
+#                print(a['image']['imageUrl'])
+##                print(a['image']['newsUrl'])
+#        for r in j['relatedQueries']:
+#            print("-->" +r['query'])
+
+
+

+ 49 - 0
gtrends/process_trends.py

@@ -0,0 +1,49 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup
+import json
+import os
+import time
+import sys
+import random
+from seleniumwire import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.remote.webdriver import WebDriver
+import dataset
+import docker
+import datetime
+import gzip
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+table=db['trending_search_flat']
+cursor=db.query('SELECT * FROM gtrends.trending_search_json order by id desc limit 24')
+
+for c in cursor:
+    js=c['json']
+    cid=c['id']
+    dt=c['dt']
+    jsobj=json.loads(js)
+    for j in jsobj:
+        print(j['title']['query'])
+        qtitle=j['title']['query']
+        for a in j['articles']:
+            print(a['title'])
+            atitle=a['title']
+            if a.get('image')!= None:
+#                print(a['image'])
+                print(a['image']['imageUrl'])
+                print(a['image']['newsUrl'])
+                aimg=a['image']['imageUrl']
+                aurl=a['image']['newsUrl']
+                table.insert({'cid':cid,'qtitle':qtitle,'atitle':atitle,'aimg':aimg,'aurl':aurl,'dt':dt})
+        for r in j['relatedQueries']:
+            print("-->" +r['query'])
+
+
+

+ 161 - 0
gtrends/tredning_search.py

@@ -0,0 +1,161 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup
+import json
+import os
+import time
+import sys
+import random
+from seleniumwire import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait, Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.remote.webdriver import WebDriver
+import dataset
+import docker
+import datetime
+import gzip
+#from fp.fp import FreeProxy
+
+
+def send(driver, cmd, params={}):
+    resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
+    url = driver.command_executor._url + resource
+    body = json.dumps({'cmd': cmd, 'params': params})
+    response = driver.command_executor._request('POST', url, body)
+#    if response['status']:
+#        raise Exception(response.get('value'))
+    return response.get('value')
+
+def add_script(driver, script):
+    send(driver, "Page.addScriptToEvaluateOnNewDocument", {"source": script})
+
+def set_viewport_size(driver, width, height):
+    window_size = driver.execute_script("""
+        return [window.outerWidth - window.innerWidth + arguments[0],
+          window.outerHeight - window.innerHeight + arguments[1]];
+        """, width, height)
+    driver.set_window_size(*window_size)
+
+
+def init_webdriver():
+#    client = docker.from_env()
+#    ls=client.containers.list()
+#    print(ls)
+#    ls[0].restart()
+#    time.sleep(11)
+
+    options = webdriver.ChromeOptions()
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--headless")
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(options=options)
+
+#    driver = webdriver.Remote(
+#    command_executor='http://127.0.0.1:4444/wd/hub',
+#    desired_capabilities=options.to_capabilities())
+    return driver
+
+
+class SelGTrend:
+
+    def __init__(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+        self.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0'
+        self.headers = {'User-Agent': self.user_agent}
+#        self.proxy = FreeProxy().get()
+
+    def search(self, key):
+        self.key = "+".join(key.split(" "))
+        self.getpage()
+
+    def getpage(self, geo):
+        result=[]
+        self.url = 'https://trends.google.com/trends/trendingsearches/daily?geo='+geo
+        try:
+            print(self.url)
+            driver=init_webdriver()
+#            driver.add_script('const setProperty = () => {     Object.defineProperty(navigator, "webdriver", {       get: () => false,     }); }; setProperty();')
+            driver.get(self.url)
+            time.sleep(5)
+
+
+            for request in driver.requests:
+                print(request.url[0:60])
+                if request.response:
+                    if 'dailytrends?' in request.url :
+                        print('*** parsing js:')
+                        resp=request.response.body
+                        data=None
+
+                        try:
+                            data = gzip.decompress(resp)
+                        except:
+                            traceback.print_exc()
+                            data=resp
+                        
+                        jstext=data.decode('utf-8')
+#                        print(jstext)
+
+                        jsobj=json.loads(jstext[6:])
+                        return jsobj
+#                        print(jsobj)
+
+
+        except Exception as e:
+            traceback.print_exc()
+            print(e)
+            pass
+#        driver.quit()
+        return result
+#        driver.quit()
+
+    def result(self):
+        return self.results
+
+    def gettext(self):
+        return self.texts
+
+    def getlinks(self):
+        return self.links
+
+    def clear(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+
+
+def save_to_db(js):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+    table = db['trending_search_json']
+    js=js['default']['trendingSearchesDays'][0]['trendingSearches']
+    try:
+        table.insert({'dt':datetime.datetime.now(),'json':json.dumps(js, ensure_ascii=False).encode('utf8')})
+    except:
+        print('dup')
+#    for j in json:
+#        print(j['title'])
+##        print(j['formattedTraffic'])
+#        print(j['relatedQueries'])
+#        if j.get('source') is not None:
+#            print(j['source'])
+##        print(json.dumps(j['image']))
+#        print(j['snippet'])
+
+#        print(j)
+#        table.insert(j)
+
+
+geo='TW'
+sgtrend=SelGTrend()
+result=sgtrend.getpage(geo)
+#print(result)
+save_to_db(result)
+#time.sleep(9999)

+ 18 - 0
gtrends/trending_to_gtrend.py

@@ -0,0 +1,18 @@
+import gtrendtest_jsraw
+import dataset
+import time
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+cursor=db.query('SELECT distinct qtitle FROM gtrends.trending_search_flat where qtitle not in (select original from kw_related)')
+lst=[]
+for c in cursor:
+    print(c['qtitle'])
+    lst.append(c['qtitle'])
+
+for l in lst:
+    sgtrend=gtrendtest_jsraw.SelGTrend()
+    data=sgtrend.search(l)
+    time.sleep(5)
+
+#data=sgtrend.search('居家')
+#data=sgtrend.search('7-11 當機')

+ 4 - 2
tests/rpc_key_test.py

@@ -6,11 +6,13 @@ import traceback
 s1={'ip':'192.168.192.199','names':['poi1','poi2','poi3','poi4']} #MBA_HOME
 #s2={'ip':'192.168.192.58','names':['poi1','poi2','poi3']} #RDP_HOME
 s3={'ip':'192.168.192.146','names':['poi1','poi2']} #TOSHIBA
-s4={'ip':'192.168.192.45','names':['poi1','poi2']} #public1
+s4={'ip':'192.168.192.45','names':['poi1','poi2','poi3']} #public1
 #s5={'ip':'192.168.192.156','names':['seo1','seo2']} #32g
 s5={'ip':'192.168.192.156','names':['poi1','poi2','poi3','poi4','poi5','poi6']} #32g
 #s5={'ip':'192.168.192.156','names':['hhhclick1','hhhclick2','seo1','seo2']}
-s6={'ip':'192.168.192.9','names':['poi1','poi2','poi3','poi4','poi5','poi6','poi7','poi8','poi9','poi10','poi11','poi12','poi13','poi14','poi15','poi16']} #64G
+#s6={'ip':'192.168.192.9','names':['poi1','poi2','poi3','poi4','poi5','poi6','poi7','poi8','poi9','poi10','poi11','poi12','poi13','poi14','poi15','poi16']} #64G
+s6={'ip':'192.168.192.9','names':['poi1','poi2','poi3','poi4','poi5']} #64G
+
 s7={'ip':'192.168.192.139','names':['poi1','poi2','poi3','poi4','poi5','poi6','poi7','poi8','poi9','poi10','poi11','poi12','poi13','poi14','poi15','poi16']} #alston1
 s8={'ip':'192.168.192.43','names':['poi1','poi2','poi3','poi4','poi5','poi6','poi7','poi8','poi9','poi10','poi11','poi12','poi13','poi14','poi15','poi16']} #alston2