Jared %!s(int64=2) %!d(string=hai) anos
pai
achega
b0c6a225c4
Modificáronse 3 ficheiros con 32 adicións e 6 borrados
  1. 4 3
      gtrends/gtrendtest_jsraw.py
  2. 10 3
      gtrends/process_trends.py
  3. 18 0
      gtrends/trending_to_gtrend.py

+ 4 - 3
gtrends/gtrendtest_jsraw.py

@@ -75,6 +75,7 @@ class SelGTrend:
         self.headers = {'User-Agent': self.user_agent}
 
     def search(self, key):
+        self.original=key
         self.key = "+".join(key.split(" "))
         return self.getpage(self.key)
 
@@ -125,7 +126,7 @@ class SelGTrend:
 
                         jsobj=json.loads(jstext[6:])
                         jsobj=jsobj['default']['rankedList']
-                        self.table.insert({'kw':self.key,'dt':datetime.datetime.now(),'json':json.dumps(jsobj, ensure_ascii=False).encode('utf8')})
+                        self.table.insert({'kw':self.original,'dt':datetime.datetime.now(),'json':json.dumps(jsobj, ensure_ascii=False).encode('utf8')})
 
 
                         print(jsobj)
@@ -152,6 +153,6 @@ class SelGTrend:
         self.results = []
 
 
-sgtrend=SelGTrend()
+#sgtrend=SelGTrend()
 #data=sgtrend.search('居家')
-data=sgtrend.search('7-11 當機')
+#data=sgtrend.search('7-11 當機')

+ 10 - 3
gtrends/process_trends.py

@@ -21,20 +21,27 @@ import gzip
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
-cursor=db.query('SELECT * FROM gtrends.trending_search_json order by id desc limit 1')
+table=db['trending_search_flat']
+cursor=db.query('SELECT * FROM gtrends.trending_search_json order by id asc')
+
 for c in cursor:
     js=c['json']
+    cid=c['id']
+    dt=c['dt']
     jsobj=json.loads(js)
     for j in jsobj:
-#        print(j)
-#        break
         print(j['title']['query'])
+        qtitle=j['title']['query']
         for a in j['articles']:
             print(a['title'])
+            atitle=a['title']
             if a.get('image')!= None:
 #                print(a['image'])
                 print(a['image']['imageUrl'])
                 print(a['image']['newsUrl'])
+                aimg=a['image']['imageUrl']
+                aurl=a['image']['newsUrl']
+                table.insert({'cid':cid,'qtitle':qtitle,'atitle':atitle,'aimg':aimg,'aurl':aurl,'dt':dt})
         for r in j['relatedQueries']:
             print("-->" +r['query'])
 

+ 18 - 0
gtrends/trending_to_gtrend.py

@@ -0,0 +1,18 @@
+import gtrendtest_jsraw
+import dataset
+import time
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
+cursor=db.query('SELECT distinct qtitle FROM gtrends.trending_search_flat')
+lst=[]
+for c in cursor:
+    print(c['qtitle'])
+    lst.append(c['qtitle'])
+
+for l in lst:
+    sgtrend=gtrendtest_jsraw.SelGTrend()
+    data=sgtrend.search(l)
+    time.sleep(5)
+
+#data=sgtrend.search('居家')
+#data=sgtrend.search('7-11 當機')