Jared 2 лет назад
Родитель
Сommit
a5923080e2
3 измененных файлов с 34 добавлено и 18 удалено
  1. 29 15
      gtrends/process_gtrend.py
  2. 4 2
      gtrends/tredning_search.py
  3. 1 1
      gtrends/trending_to_gtrend.py

+ 29 - 15
gtrends/process_gtrend.py

@@ -19,23 +19,37 @@ import docker
 import datetime
 import gzip
 
-alldict={}
-fullkw='7-11+當機'
+
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
-cursor=db.query('SELECT * FROM gtrends.gtrend_jsraw where kw="'+fullkw+'"  order by id desc limit 5')
+table=db['kw_related']
+lst=[]
+cursor=db.query('SELECT distinct kw FROM gtrends.gtrend_jsraw order by id desc')
 for c in cursor:
-    js=c['json']
-    jsobj=json.loads(js)
-#    jsobj=jsobj['rankedKeyword']
-    for j in jsobj:
-        kws=j['rankedKeyword']
-        for kw in kws:
-            if 'query' in kw:
-                print(kw['query'])
-                alldict[kw['query']]=1
-                if len(alldict)>=5:
-                    break
-print(alldict)
+    lst.append(c['kw'])
+
+for l in lst:
+    fullkw=l
+    alldict={}
+
+    cursor=db.query('SELECT * FROM gtrends.gtrend_jsraw where kw="'+fullkw+'"  order by id desc')
+    for c in cursor:
+        js=c['json']
+        jsobj=json.loads(js)
+    #    jsobj=jsobj['rankedKeyword']
+        for j in jsobj:
+            kws=j['rankedKeyword']
+            for kw in kws:
+                if 'query' in kw:
+                    print(kw['query'])
+                    alldict[kw['query']]=1
+    for k,v in alldict.items():
+        table.insert({'original':fullkw,'kw':k})
+        print(k)
+
+
+#                    if len(alldict)>=5:
+#                        break
+    print(alldict)
 #        break
 #        print(j['title']['query'])
 #        for a in j['articles']:

+ 4 - 2
gtrends/tredning_search.py

@@ -136,8 +136,10 @@ def save_to_db(js):
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
     table = db['trending_search_json']
     js=js['default']['trendingSearchesDays'][0]['trendingSearches']
-    table.insert({'dt':datetime.datetime.now(),'json':json.dumps(js, ensure_ascii=False).encode('utf8')})
-    
+    try:
+        table.insert({'dt':datetime.datetime.now(),'json':json.dumps(js, ensure_ascii=False).encode('utf8')})
+    except:
+        print('dup')
 #    for j in json:
 #        print(j['title'])
 ##        print(j['formattedTraffic'])

+ 1 - 1
gtrends/trending_to_gtrend.py

@@ -3,7 +3,7 @@ import dataset
 import time
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
-cursor=db.query('SELECT distinct qtitle FROM gtrends.trending_search_flat')
+cursor=db.query('SELECT distinct qtitle FROM gtrends.trending_search_flat where qtitle not in (select original from kw_related)')
 lst=[]
 for c in cursor:
     print(c['qtitle'])