Prechádzať zdrojové kódy

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Jared 2 rokov pred
rodič
commit
d9344e2892
1 zmenil súbory, kde vykonal 58 pridanie a 84 odobranie
  1. 58 84
      choozmo/gsc_tree.py

+ 58 - 84
choozmo/gsc_tree.py

@@ -1,110 +1,84 @@
-import traceback
+import searchconsole
 import dataset
-import codecs
-import sys
-import pickle
 import os
-import searchconsole
-import pandas as pd
 import networkx as nx
-#import pysftp
-import codecs
-import pyvis
-import sys
-import pickle
-import os
-import searchconsole
 from pyvis.network import Network
-import jieba
-
-
-#db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
-#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+import sqlite3
 
-#db.begin()
 db = dataset.connect('sqlite:///:memory:')
-table=db['tmp']
-#table=db['gsc_page_query_year']
-#pname='korea'
-rid=0
+table = db['tmp']
+# table=db['gsc_page_query_year']
+# pname='korea'
+rid = 0
+
 
 def checkig():
     global instl
     global table
     global pname
     global rid
-    lst=[]
-    cntdict={}
-    codelist={}
-    idx=0
-    flag_break=False
-
-    fname=os.path.abspath(__file__)
-    elmts=fname.split(os.path.sep)
-    path2=os.path.sep.join(elmts[0:-1])
-    keysdir=path2+os.path.sep+'../keys'+os.path.sep
-
-    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
-#    account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
+    lst = []
+    cntdict = {}
+    codelist = {}
+    idx = 0
+    flag_break = False
+    
+    fname = os.path.abspath(__file__)
+    elmts = fname.split(os.path.sep)
+    path2 = os.path.sep.join(elmts[0:-1])
+    keysdir = path2 + os.path.sep + '../keys' + os.path.sep
+    
+    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',
+                                         credentials='c:/keys/credentials.json')
+    # webproperty = account['sc-domain:www.yogoclean.com']
+    webproperty = account['https://www.yogoclean.com/']
+    print(webproperty)
+    report = webproperty.query.range('today', months=-6).dimension('page', 'query').get()
+    print(report)
+    # report = webproperty.query.range('2022-05-28', '2022-05-29').dimension('page', 'query').get()
+    # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
     G = nx.Graph()
-
-#    webproperty = account['https://ipromise.com.tw/']
-#    webproperty = account['sc-domain:face8ook.org']
-#    webproperty = account['sc-domain:hhh.com.tw']
-#   webproperty = account['sc-domain:hhh.com.tw']
-
-#    webproperty = account['https://www.damanwoo.com/']
-    webproperty = account['https://innews.com.tw/']
-
-#    report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
-#    report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
-#    report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
-#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
-#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
-    report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
-
-    result=[]
-    rdict={}
-    total_idx=0
-
+    
+    result = []
+    rdict = {}
+    total_idx = 0
     for r in report.rows:
-        if 'hhh.com.tw/designers/cases/' not in r[0]:
-            continue
+        print(r)
+        print(rdict)
         if rdict.get(r[0]) is None:
-            total_idx+=1
-            rid=total_idx
-            rdict[r[0]]=rid
+            total_idx += 1
+            rid = total_idx
+            rdict[r[0]] = rid
         else:
-            rid=rdict[r[0]]
-        entry={'page':r[0],'query':r[1],'rid':rid}
+            rid = rdict[r[0]]
+        entry = {'page': r[0], 'query': r[1], 'rid': rid}
         result.append(entry)
-
+    
     print('list done')
-
     for r in result:
         table.insert(r)
     db.commit()
     print('db done')
-
-#    cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
-    cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
-
-    riddict={}
-    prev=''
-    curnode=''
-    cururl=''
-
-    total_idx=0
-    for c in cursor:
-        G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
-    remove=[]
+    
+    cursor = db.query(
+        'select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
+    # select_data = cur.execute('select q,url,rid from tmp order by rid ')
+    
+    riddict = {}
+    prev = ''
+    curnode = ''
+    cururl = ''
+    
+    total_idx = 0
+    for data in cursor:
+        G.add_edge(data[0], data[2], weight=3, width=3, borderwidth=3)
+    remove = []
     G.remove_edges_from(nx.selfloop_edges(G))
-    G2=G
-    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
-
+    G2 = G
+    pyG = Network(height="600px", width="100%", bgcolor="#444444", font_color="white")
+    
     pyG.from_nx(G2)
-    pyG.show('news.html')
-
+    pyG.show('news15.html')
 
-r=checkig()
 
+r = checkig()